| 
									
										
										
										
											2021-06-21 12:20:25 +00:00
										 |  |  |  | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  | # | 
					
						
							|  |  |  |  | # Licensed under the Apache License, Version 2.0 (the "License"); | 
					
						
							|  |  |  |  | # you may not use this file except in compliance with the License. | 
					
						
							|  |  |  |  | # You may obtain a copy of the License at | 
					
						
							|  |  |  |  | # | 
					
						
							|  |  |  |  | #     http://www.apache.org/licenses/LICENSE-2.0 | 
					
						
							|  |  |  |  | # | 
					
						
							|  |  |  |  | # Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  |  | # distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  |  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  |  | # See the License for the specific language governing permissions and | 
					
						
							|  |  |  |  | # limitations under the License. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | from __future__ import absolute_import | 
					
						
							|  |  |  |  | from __future__ import division | 
					
						
							|  |  |  |  | from __future__ import print_function | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  | import os | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  | import sys | 
					
						
							| 
									
										
										
										
											2021-04-27 10:32:17 +08:00
										 |  |  |  | import platform | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  | import yaml | 
					
						
							|  |  |  |  | import time | 
					
						
							| 
									
										
										
										
											2022-02-01 17:46:42 +08:00
										 |  |  |  | import datetime | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  | import paddle | 
					
						
							|  |  |  |  | import paddle.distributed as dist | 
					
						
							|  |  |  |  | from tqdm import tqdm | 
					
						
							|  |  |  |  | from argparse import ArgumentParser, RawDescriptionHelpFormatter | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  | from ppocr.utils.stats import TrainingStats | 
					
						
							|  |  |  |  | from ppocr.utils.save_load import save_model | 
					
						
							| 
									
										
										
										
											2022-02-01 17:46:42 +08:00
										 |  |  |  | from ppocr.utils.utility import print_dict, AverageMeter | 
					
						
							| 
									
										
										
										
											2020-11-04 20:43:27 +08:00
										 |  |  |  | from ppocr.utils.logging import get_logger | 
					
						
							| 
									
										
										
										
											2021-09-28 02:28:25 +00:00
										 |  |  |  | from ppocr.utils import profiler | 
					
						
							| 
									
										
										
										
											2020-11-04 20:43:27 +08:00
										 |  |  |  | from ppocr.data import build_dataloader | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-05 15:13:36 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  | class ArgsParser(ArgumentParser): | 
					
						
							|  |  |  |  |     def __init__(self): | 
					
						
							|  |  |  |  |         super(ArgsParser, self).__init__( | 
					
						
							|  |  |  |  |             formatter_class=RawDescriptionHelpFormatter) | 
					
						
							|  |  |  |  |         self.add_argument("-c", "--config", help="configuration file to use") | 
					
						
							|  |  |  |  |         self.add_argument( | 
					
						
							|  |  |  |  |             "-o", "--opt", nargs='+', help="set configuration options") | 
					
						
							| 
									
										
										
										
											2021-09-28 10:01:37 +08:00
										 |  |  |  |         self.add_argument( | 
					
						
							|  |  |  |  |             '-p', | 
					
						
							|  |  |  |  |             '--profiler_options', | 
					
						
							|  |  |  |  |             type=str, | 
					
						
							|  |  |  |  |             default=None, | 
					
						
							| 
									
										
										
										
											2022-02-01 17:46:42 +08:00
										 |  |  |  |             help='The option of profiler, which should be in format ' \ | 
					
						
							|  |  |  |  |                  '\"key1=value1;key2=value2;key3=value3\".' | 
					
						
							| 
									
										
										
										
											2021-09-28 10:01:37 +08:00
										 |  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     def parse_args(self, argv=None): | 
					
						
							|  |  |  |  |         args = super(ArgsParser, self).parse_args(argv) | 
					
						
							|  |  |  |  |         assert args.config is not None, \ | 
					
						
							|  |  |  |  |             "Please specify --config=configure_file_path." | 
					
						
							|  |  |  |  |         args.opt = self._parse_opt(args.opt) | 
					
						
							|  |  |  |  |         return args | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def _parse_opt(self, opts): | 
					
						
							|  |  |  |  |         config = {} | 
					
						
							|  |  |  |  |         if not opts: | 
					
						
							|  |  |  |  |             return config | 
					
						
							|  |  |  |  |         for s in opts: | 
					
						
							|  |  |  |  |             s = s.strip() | 
					
						
							|  |  |  |  |             k, v = s.split('=') | 
					
						
							|  |  |  |  |             config[k] = yaml.load(v, Loader=yaml.Loader) | 
					
						
							|  |  |  |  |         return config | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def load_config(file_path): | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     Load config from yml/yaml file. | 
					
						
							|  |  |  |  |     Args: | 
					
						
							|  |  |  |  |         file_path (str): Path of the config file to be loaded. | 
					
						
							|  |  |  |  |     Returns: global config | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     _, ext = os.path.splitext(file_path) | 
					
						
							|  |  |  |  |     assert ext in ['.yml', '.yaml'], "only support yaml files for now" | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |     config = yaml.load(open(file_path, 'rb'), Loader=yaml.Loader) | 
					
						
							|  |  |  |  |     return config | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  | def merge_config(config, opts): | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  |     """
 | 
					
						
							|  |  |  |  |     Merge config into global config. | 
					
						
							|  |  |  |  |     Args: | 
					
						
							|  |  |  |  |         config (dict): Config to be merged. | 
					
						
							|  |  |  |  |     Returns: global config | 
					
						
							|  |  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |     for key, value in opts.items(): | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  |         if "." not in key: | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |             if isinstance(value, dict) and key in config: | 
					
						
							|  |  |  |  |                 config[key].update(value) | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |                 config[key] = value | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  |         else: | 
					
						
							|  |  |  |  |             sub_keys = key.split('.') | 
					
						
							| 
									
										
										
										
											2020-06-17 16:11:29 +08:00
										 |  |  |  |             assert ( | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |                 sub_keys[0] in config | 
					
						
							| 
									
										
										
										
											2022-02-01 17:46:42 +08:00
										 |  |  |  |             ), "the sub_keys can only be one of global_config: {}, but get: " \ | 
					
						
							|  |  |  |  |                "{}, please check your running command".format( | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |                 config.keys(), sub_keys[0]) | 
					
						
							|  |  |  |  |             cur = config[sub_keys[0]] | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  |             for idx, sub_key in enumerate(sub_keys[1:]): | 
					
						
							|  |  |  |  |                 if idx == len(sub_keys) - 2: | 
					
						
							|  |  |  |  |                     cur[sub_key] = value | 
					
						
							|  |  |  |  |                 else: | 
					
						
							|  |  |  |  |                     cur = cur[sub_key] | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |     return config | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def check_gpu(use_gpu): | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     Log error and exit when set use_gpu=true in paddlepaddle | 
					
						
							|  |  |  |  |     cpu version. | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     err = "Config use_gpu cannot be set as true while you are " \ | 
					
						
							|  |  |  |  |           "using paddlepaddle cpu version ! \nPlease try: \n" \ | 
					
						
							|  |  |  |  |           "\t1. Install paddlepaddle-gpu to run model on GPU \n" \ | 
					
						
							|  |  |  |  |           "\t2. Set use_gpu as false in config file to run " \ | 
					
						
							|  |  |  |  |           "model on CPU" | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     try: | 
					
						
							| 
									
										
										
										
											2020-12-21 17:13:32 +08:00
										 |  |  |  |         if use_gpu and not paddle.is_compiled_with_cuda(): | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |             print(err) | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  |             sys.exit(1) | 
					
						
							|  |  |  |  |     except Exception as e: | 
					
						
							|  |  |  |  |         pass | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-23 08:31:16 +00:00
										 |  |  |  | def check_xpu(use_xpu): | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     Log error and exit when set use_xpu=true in paddlepaddle | 
					
						
							|  |  |  |  |     cpu/gpu version. | 
					
						
							|  |  |  |  |     """
 | 
					
						
							|  |  |  |  |     err = "Config use_xpu cannot be set as true while you are " \ | 
					
						
							|  |  |  |  |           "using paddlepaddle cpu/gpu version ! \nPlease try: \n" \ | 
					
						
							|  |  |  |  |           "\t1. Install paddlepaddle-xpu to run model on XPU \n" \ | 
					
						
							|  |  |  |  |           "\t2. Set use_xpu as false in config file to run " \ | 
					
						
							|  |  |  |  |           "model on CPU/GPU" | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     try: | 
					
						
							|  |  |  |  |         if use_xpu and not paddle.is_compiled_with_xpu(): | 
					
						
							|  |  |  |  |             print(err) | 
					
						
							|  |  |  |  |             sys.exit(1) | 
					
						
							|  |  |  |  |     except Exception as e: | 
					
						
							|  |  |  |  |         pass | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  | def train(config, | 
					
						
							| 
									
										
										
										
											2020-11-04 20:43:27 +08:00
										 |  |  |  |           train_dataloader, | 
					
						
							|  |  |  |  |           valid_dataloader, | 
					
						
							|  |  |  |  |           device, | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |           model, | 
					
						
							|  |  |  |  |           loss_class, | 
					
						
							|  |  |  |  |           optimizer, | 
					
						
							|  |  |  |  |           lr_scheduler, | 
					
						
							|  |  |  |  |           post_process_class, | 
					
						
							|  |  |  |  |           eval_class, | 
					
						
							|  |  |  |  |           pre_best_model_dict, | 
					
						
							|  |  |  |  |           logger, | 
					
						
							| 
									
										
										
										
											2021-10-15 08:34:27 +00:00
										 |  |  |  |           vdl_writer=None, | 
					
						
							|  |  |  |  |           scaler=None): | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |     cal_metric_during_train = config['Global'].get('cal_metric_during_train', | 
					
						
							|  |  |  |  |                                                    False) | 
					
						
							| 
									
										
										
										
											2022-02-07 12:19:25 +00:00
										 |  |  |  |     calc_epoch_interval = config['Global'].get('calc_epoch_interval', 1) | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  |     log_smooth_window = config['Global']['log_smooth_window'] | 
					
						
							|  |  |  |  |     epoch_num = config['Global']['epoch_num'] | 
					
						
							|  |  |  |  |     print_batch_step = config['Global']['print_batch_step'] | 
					
						
							|  |  |  |  |     eval_batch_step = config['Global']['eval_batch_step'] | 
					
						
							| 
									
										
										
										
											2021-09-29 01:59:43 +00:00
										 |  |  |  |     profiler_options = config['profiler_options'] | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-04 20:43:27 +08:00
										 |  |  |  |     global_step = 0 | 
					
						
							| 
									
										
										
										
											2021-04-26 21:13:21 -05:00
										 |  |  |  |     if 'global_step' in pre_best_model_dict: | 
					
						
							|  |  |  |  |         global_step = pre_best_model_dict['global_step'] | 
					
						
							| 
									
										
										
										
											2020-07-07 02:35:17 +00:00
										 |  |  |  |     start_eval_step = 0 | 
					
						
							|  |  |  |  |     if type(eval_batch_step) == list and len(eval_batch_step) >= 2: | 
					
						
							|  |  |  |  |         start_eval_step = eval_batch_step[0] | 
					
						
							|  |  |  |  |         eval_batch_step = eval_batch_step[1] | 
					
						
							| 
									
										
										
										
											2021-02-04 11:33:48 +08:00
										 |  |  |  |         if len(valid_dataloader) == 0: | 
					
						
							|  |  |  |  |             logger.info( | 
					
						
							| 
									
										
										
										
											2022-02-01 17:46:42 +08:00
										 |  |  |  |                 'No Images in eval dataset, evaluation during training ' \ | 
					
						
							|  |  |  |  |                 'will be disabled' | 
					
						
							| 
									
										
										
										
											2021-02-04 11:33:48 +08:00
										 |  |  |  |             ) | 
					
						
							|  |  |  |  |             start_eval_step = 1e111 | 
					
						
							| 
									
										
										
										
											2020-07-07 02:35:17 +00:00
										 |  |  |  |         logger.info( | 
					
						
							| 
									
										
										
										
											2022-02-01 17:46:42 +08:00
										 |  |  |  |             "During the training process, after the {}th iteration, " \ | 
					
						
							|  |  |  |  |             "an evaluation is run every {} iterations". | 
					
						
							| 
									
										
										
										
											2020-07-07 02:35:17 +00:00
										 |  |  |  |             format(start_eval_step, eval_batch_step)) | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  |     save_epoch_step = config['Global']['save_epoch_step'] | 
					
						
							|  |  |  |  |     save_model_dir = config['Global']['save_model_dir'] | 
					
						
							| 
									
										
										
										
											2020-05-13 16:05:00 +08:00
										 |  |  |  |     if not os.path.exists(save_model_dir): | 
					
						
							|  |  |  |  |         os.makedirs(save_model_dir) | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |     main_indicator = eval_class.main_indicator | 
					
						
							|  |  |  |  |     best_model_dict = {main_indicator: 0} | 
					
						
							|  |  |  |  |     best_model_dict.update(pre_best_model_dict) | 
					
						
							|  |  |  |  |     train_stats = TrainingStats(log_smooth_window, ['lr']) | 
					
						
							| 
									
										
										
										
											2021-01-22 03:15:56 +00:00
										 |  |  |  |     model_average = False | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |     model.train() | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-02-07 07:31:24 +00:00
										 |  |  |  |     use_srn = config['Architecture']['algorithm'] == "SRN" | 
					
						
							| 
									
										
										
										
											2022-04-26 16:19:31 +08:00
										 |  |  |  |     extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR"] | 
					
						
							| 
									
										
										
										
											2022-04-27 05:19:55 +00:00
										 |  |  |  |     extra_input = False | 
					
						
							| 
									
										
										
										
											2022-04-26 16:19:31 +08:00
										 |  |  |  |     if config['Architecture']['algorithm'] == 'Distillation': | 
					
						
							| 
									
										
										
										
											2022-04-27 05:19:55 +00:00
										 |  |  |  |         for key in config['Architecture']["Models"]: | 
					
						
							|  |  |  |  |             extra_input = extra_input or config['Architecture']['Models'][key][ | 
					
						
							|  |  |  |  |                 'algorithm'] in extra_input_models | 
					
						
							| 
									
										
										
										
											2022-04-26 16:19:31 +08:00
										 |  |  |  |     else: | 
					
						
							|  |  |  |  |         extra_input = config['Architecture']['algorithm'] in extra_input_models | 
					
						
							| 
									
										
										
										
											2021-08-19 09:31:02 +00:00
										 |  |  |  |     try: | 
					
						
							| 
									
										
										
										
											2021-07-07 01:54:03 +00:00
										 |  |  |  |         model_type = config['Architecture']['model_type'] | 
					
						
							| 
									
										
										
										
											2021-08-19 09:31:02 +00:00
										 |  |  |  |     except: | 
					
						
							| 
									
										
										
										
											2021-07-07 01:54:03 +00:00
										 |  |  |  |         model_type = None | 
					
						
							| 
									
										
										
										
											2022-04-26 16:19:31 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-22 19:58:14 +08:00
										 |  |  |  |     algorithm = config['Architecture']['algorithm'] | 
					
						
							| 
									
										
										
										
											2021-02-07 07:31:24 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |     start_epoch = best_model_dict[ | 
					
						
							|  |  |  |  |         'start_epoch'] if 'start_epoch' in best_model_dict else 1 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     total_samples = 0 | 
					
						
							| 
									
										
										
										
											2022-02-01 17:46:42 +08:00
										 |  |  |  |     train_reader_cost = 0.0 | 
					
						
							|  |  |  |  |     train_batch_cost = 0.0 | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |     reader_start = time.time() | 
					
						
							| 
									
										
										
										
											2022-02-01 17:46:42 +08:00
										 |  |  |  |     eta_meter = AverageMeter() | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     max_iter = len(train_dataloader) - 1 if platform.system( | 
					
						
							|  |  |  |  |     ) == "Windows" else len(train_dataloader) | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-30 19:54:16 +08:00
										 |  |  |  |     for epoch in range(start_epoch, epoch_num + 1): | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |         if train_dataloader.dataset.need_reset: | 
					
						
							|  |  |  |  |             train_dataloader = build_dataloader( | 
					
						
							|  |  |  |  |                 config, 'Train', device, logger, seed=epoch) | 
					
						
							|  |  |  |  |             max_iter = len(train_dataloader) - 1 if platform.system( | 
					
						
							|  |  |  |  |             ) == "Windows" else len(train_dataloader) | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |         for idx, batch in enumerate(train_dataloader): | 
					
						
							| 
									
										
										
										
											2021-09-28 10:01:37 +08:00
										 |  |  |  |             profiler.add_profiler_step(profiler_options) | 
					
						
							| 
									
										
										
										
											2021-10-27 12:16:36 +00:00
										 |  |  |  |             train_reader_cost += time.time() - reader_start | 
					
						
							| 
									
										
										
										
											2021-04-29 12:37:05 +08:00
										 |  |  |  |             if idx >= max_iter: | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |                 break | 
					
						
							|  |  |  |  |             lr = optimizer.get_lr() | 
					
						
							|  |  |  |  |             images = batch[0] | 
					
						
							| 
									
										
										
										
											2021-02-07 07:31:24 +00:00
										 |  |  |  |             if use_srn: | 
					
						
							| 
									
										
										
										
											2021-01-22 03:15:56 +00:00
										 |  |  |  |                 model_average = True | 
					
						
							| 
									
										
										
										
											2021-10-15 08:30:51 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |             # use amp | 
					
						
							|  |  |  |  |             if scaler: | 
					
						
							|  |  |  |  |                 with paddle.amp.auto_cast(): | 
					
						
							|  |  |  |  |                     if model_type == 'table' or extra_input: | 
					
						
							|  |  |  |  |                         preds = model(images, data=batch[1:]) | 
					
						
							|  |  |  |  |                     else: | 
					
						
							|  |  |  |  |                         preds = model(images) | 
					
						
							| 
									
										
										
										
											2020-12-30 16:15:49 +08:00
										 |  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2021-10-15 08:30:51 +00:00
										 |  |  |  |                 if model_type == 'table' or extra_input: | 
					
						
							|  |  |  |  |                     preds = model(images, data=batch[1:]) | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |                 elif model_type in ["kie", 'vqa']: | 
					
						
							| 
									
										
										
										
											2021-12-18 08:04:10 +00:00
										 |  |  |  |                     preds = model(batch) | 
					
						
							| 
									
										
										
										
											2021-10-15 08:30:51 +00:00
										 |  |  |  |                 else: | 
					
						
							|  |  |  |  |                     preds = model(images) | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |             loss = loss_class(preds, batch) | 
					
						
							|  |  |  |  |             avg_loss = loss['loss'] | 
					
						
							| 
									
										
										
										
											2021-10-15 08:30:51 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |             if scaler: | 
					
						
							|  |  |  |  |                 scaled_avg_loss = scaler.scale(avg_loss) | 
					
						
							|  |  |  |  |                 scaled_avg_loss.backward() | 
					
						
							|  |  |  |  |                 scaler.minimize(optimizer, scaled_avg_loss) | 
					
						
							|  |  |  |  |             else: | 
					
						
							|  |  |  |  |                 avg_loss.backward() | 
					
						
							|  |  |  |  |                 optimizer.step() | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |             optimizer.clear_grad() | 
					
						
							| 
									
										
										
										
											2020-11-16 19:00:27 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-07 12:19:25 +00:00
										 |  |  |  |             if cal_metric_during_train and epoch % calc_epoch_interval == 0:  # only rec and cls need | 
					
						
							|  |  |  |  |                 batch = [item.numpy() for item in batch] | 
					
						
							|  |  |  |  |                 if model_type in ['table', 'kie']: | 
					
						
							|  |  |  |  |                     eval_class(preds, batch) | 
					
						
							|  |  |  |  |                 else: | 
					
						
							| 
									
										
										
										
											2022-04-26 16:19:31 +08:00
										 |  |  |  |                     if config['Loss']['name'] in ['MultiLoss', 'MultiLoss_v2' | 
					
						
							|  |  |  |  |                                                   ]:  # for multi head loss | 
					
						
							|  |  |  |  |                         post_result = post_process_class( | 
					
						
							|  |  |  |  |                             preds['ctc'], batch[1])  # for CTC head out | 
					
						
							|  |  |  |  |                     else: | 
					
						
							|  |  |  |  |                         post_result = post_process_class(preds, batch[1]) | 
					
						
							| 
									
										
										
										
											2022-02-07 12:19:25 +00:00
										 |  |  |  |                     eval_class(post_result, batch) | 
					
						
							|  |  |  |  |                 metric = eval_class.get_metric() | 
					
						
							|  |  |  |  |                 train_stats.update(metric) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-01 17:46:42 +08:00
										 |  |  |  |             train_batch_time = time.time() - reader_start | 
					
						
							|  |  |  |  |             train_batch_cost += train_batch_time | 
					
						
							|  |  |  |  |             eta_meter.update(train_batch_time) | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |             global_step += 1 | 
					
						
							| 
									
										
										
										
											2021-10-27 12:16:36 +00:00
										 |  |  |  |             total_samples += len(images) | 
					
						
							| 
									
										
										
										
											2020-11-16 19:00:27 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-04 20:43:27 +08:00
										 |  |  |  |             if not isinstance(lr_scheduler, float): | 
					
						
							|  |  |  |  |                 lr_scheduler.step() | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |             # logger and visualdl | 
					
						
							|  |  |  |  |             stats = {k: v.numpy().mean() for k, v in loss.items()} | 
					
						
							|  |  |  |  |             stats['lr'] = lr | 
					
						
							|  |  |  |  |             train_stats.update(stats) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |             if vdl_writer is not None and dist.get_rank() == 0: | 
					
						
							|  |  |  |  |                 for k, v in train_stats.get().items(): | 
					
						
							|  |  |  |  |                     vdl_writer.add_scalar('TRAIN/{}'.format(k), v, global_step) | 
					
						
							|  |  |  |  |                 vdl_writer.add_scalar('TRAIN/lr', lr, global_step) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-24 06:32:38 +00:00
										 |  |  |  |             if dist.get_rank() == 0 and ( | 
					
						
							|  |  |  |  |                 (global_step > 0 and global_step % print_batch_step == 0) or | 
					
						
							|  |  |  |  |                 (idx >= len(train_dataloader) - 1)): | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |                 logs = train_stats.log() | 
					
						
							| 
									
										
										
										
											2022-02-07 07:02:04 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-01 17:46:42 +08:00
										 |  |  |  |                 eta_sec = ((epoch_num + 1 - epoch) * \ | 
					
						
							|  |  |  |  |                     len(train_dataloader) - idx - 1) * eta_meter.avg | 
					
						
							|  |  |  |  |                 eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec))) | 
					
						
							|  |  |  |  |                 strs = 'epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: ' \ | 
					
						
							|  |  |  |  |                        '{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, ' \ | 
					
						
							| 
									
										
										
										
											2022-02-08 07:43:52 +00:00
										 |  |  |  |                        'ips: {:.5f} samples/s, eta: {}'.format( | 
					
						
							| 
									
										
										
										
											2022-02-01 17:46:42 +08:00
										 |  |  |  |                     epoch, epoch_num, global_step, logs, | 
					
						
							|  |  |  |  |                     train_reader_cost / print_batch_step, | 
					
						
							|  |  |  |  |                     train_batch_cost / print_batch_step, | 
					
						
							|  |  |  |  |                     total_samples / print_batch_step, | 
					
						
							|  |  |  |  |                     total_samples / train_batch_cost, eta_sec_format) | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |                 logger.info(strs) | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-27 12:16:36 +00:00
										 |  |  |  |                 total_samples = 0 | 
					
						
							| 
									
										
										
										
											2022-02-01 17:46:42 +08:00
										 |  |  |  |                 train_reader_cost = 0.0 | 
					
						
							|  |  |  |  |                 train_batch_cost = 0.0 | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |             # eval | 
					
						
							|  |  |  |  |             if global_step > start_eval_step and \ | 
					
						
							| 
									
										
										
										
											2022-02-01 17:46:42 +08:00
										 |  |  |  |                     (global_step - start_eval_step) % eval_batch_step == 0 \ | 
					
						
							|  |  |  |  |                     and dist.get_rank() == 0: | 
					
						
							| 
									
										
										
										
											2021-01-22 03:15:56 +00:00
										 |  |  |  |                 if model_average: | 
					
						
							|  |  |  |  |                     Model_Average = paddle.incubate.optimizer.ModelAverage( | 
					
						
							|  |  |  |  |                         0.15, | 
					
						
							|  |  |  |  |                         parameters=model.parameters(), | 
					
						
							|  |  |  |  |                         min_average_window=10000, | 
					
						
							|  |  |  |  |                         max_average_window=15625) | 
					
						
							|  |  |  |  |                     Model_Average.apply() | 
					
						
							| 
									
										
										
										
											2021-02-07 07:31:24 +00:00
										 |  |  |  |                 cur_metric = eval( | 
					
						
							|  |  |  |  |                     model, | 
					
						
							|  |  |  |  |                     valid_dataloader, | 
					
						
							|  |  |  |  |                     post_process_class, | 
					
						
							|  |  |  |  |                     eval_class, | 
					
						
							| 
									
										
										
										
											2021-06-21 12:33:19 +00:00
										 |  |  |  |                     model_type, | 
					
						
							| 
									
										
										
										
											2021-09-28 11:51:01 +08:00
										 |  |  |  |                     extra_input=extra_input) | 
					
						
							| 
									
										
										
										
											2021-01-26 15:16:02 +08:00
										 |  |  |  |                 cur_metric_str = 'cur metric, {}'.format(', '.join( | 
					
						
							|  |  |  |  |                     ['{}: {}'.format(k, v) for k, v in cur_metric.items()])) | 
					
						
							|  |  |  |  |                 logger.info(cur_metric_str) | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |                 # logger metric | 
					
						
							|  |  |  |  |                 if vdl_writer is not None: | 
					
						
							| 
									
										
										
										
											2021-01-26 15:16:02 +08:00
										 |  |  |  |                     for k, v in cur_metric.items(): | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |                         if isinstance(v, (float, int)): | 
					
						
							|  |  |  |  |                             vdl_writer.add_scalar('EVAL/{}'.format(k), | 
					
						
							| 
									
										
										
										
											2021-01-26 15:16:02 +08:00
										 |  |  |  |                                                   cur_metric[k], global_step) | 
					
						
							|  |  |  |  |                 if cur_metric[main_indicator] >= best_model_dict[ | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |                         main_indicator]: | 
					
						
							| 
									
										
										
										
											2021-01-26 15:16:02 +08:00
										 |  |  |  |                     best_model_dict.update(cur_metric) | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |                     best_model_dict['best_epoch'] = epoch | 
					
						
							|  |  |  |  |                     save_model( | 
					
						
							|  |  |  |  |                         model, | 
					
						
							|  |  |  |  |                         optimizer, | 
					
						
							|  |  |  |  |                         save_model_dir, | 
					
						
							|  |  |  |  |                         logger, | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |                         config, | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |                         is_best=True, | 
					
						
							|  |  |  |  |                         prefix='best_accuracy', | 
					
						
							|  |  |  |  |                         best_model_dict=best_model_dict, | 
					
						
							| 
									
										
										
										
											2021-04-26 21:13:21 -05:00
										 |  |  |  |                         epoch=epoch, | 
					
						
							|  |  |  |  |                         global_step=global_step) | 
					
						
							| 
									
										
										
										
											2021-01-26 15:16:02 +08:00
										 |  |  |  |                 best_str = 'best metric, {}'.format(', '.join([ | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |                     '{}: {}'.format(k, v) for k, v in best_model_dict.items() | 
					
						
							|  |  |  |  |                 ])) | 
					
						
							|  |  |  |  |                 logger.info(best_str) | 
					
						
							|  |  |  |  |                 # logger best metric | 
					
						
							|  |  |  |  |                 if vdl_writer is not None: | 
					
						
							|  |  |  |  |                     vdl_writer.add_scalar('EVAL/best_{}'.format(main_indicator), | 
					
						
							|  |  |  |  |                                           best_model_dict[main_indicator], | 
					
						
							|  |  |  |  |                                           global_step) | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-27 12:16:36 +00:00
										 |  |  |  |             reader_start = time.time() | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |         if dist.get_rank() == 0: | 
					
						
							|  |  |  |  |             save_model( | 
					
						
							|  |  |  |  |                 model, | 
					
						
							|  |  |  |  |                 optimizer, | 
					
						
							|  |  |  |  |                 save_model_dir, | 
					
						
							|  |  |  |  |                 logger, | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |                 config, | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |                 is_best=False, | 
					
						
							|  |  |  |  |                 prefix='latest', | 
					
						
							|  |  |  |  |                 best_model_dict=best_model_dict, | 
					
						
							| 
									
										
										
										
											2021-04-26 21:13:21 -05:00
										 |  |  |  |                 epoch=epoch, | 
					
						
							|  |  |  |  |                 global_step=global_step) | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |         if dist.get_rank() == 0 and epoch > 0 and epoch % save_epoch_step == 0: | 
					
						
							|  |  |  |  |             save_model( | 
					
						
							|  |  |  |  |                 model, | 
					
						
							|  |  |  |  |                 optimizer, | 
					
						
							|  |  |  |  |                 save_model_dir, | 
					
						
							|  |  |  |  |                 logger, | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |                 config, | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |                 is_best=False, | 
					
						
							|  |  |  |  |                 prefix='iter_epoch_{}'.format(epoch), | 
					
						
							|  |  |  |  |                 best_model_dict=best_model_dict, | 
					
						
							| 
									
										
										
										
											2021-04-26 21:13:21 -05:00
										 |  |  |  |                 epoch=epoch, | 
					
						
							|  |  |  |  |                 global_step=global_step) | 
					
						
							| 
									
										
										
										
											2021-01-26 15:16:02 +08:00
										 |  |  |  |     best_str = 'best metric, {}'.format(', '.join( | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |         ['{}: {}'.format(k, v) for k, v in best_model_dict.items()])) | 
					
						
							|  |  |  |  |     logger.info(best_str) | 
					
						
							|  |  |  |  |     if dist.get_rank() == 0 and vdl_writer is not None: | 
					
						
							|  |  |  |  |         vdl_writer.close() | 
					
						
							| 
									
										
										
										
											2020-05-10 16:26:57 +08:00
										 |  |  |  |     return | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-22 04:24:14 +00:00
										 |  |  |  | def eval(model, | 
					
						
							|  |  |  |  |          valid_dataloader, | 
					
						
							|  |  |  |  |          post_process_class, | 
					
						
							|  |  |  |  |          eval_class, | 
					
						
							| 
									
										
										
										
											2021-09-26 15:09:48 +08:00
										 |  |  |  |          model_type=None, | 
					
						
							| 
									
										
										
										
											2021-09-28 11:51:01 +08:00
										 |  |  |  |          extra_input=False): | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |     model.eval() | 
					
						
							|  |  |  |  |     with paddle.no_grad(): | 
					
						
							|  |  |  |  |         total_frame = 0.0 | 
					
						
							|  |  |  |  |         total_time = 0.0 | 
					
						
							| 
									
										
										
										
											2021-10-27 12:16:36 +00:00
										 |  |  |  |         pbar = tqdm( | 
					
						
							|  |  |  |  |             total=len(valid_dataloader), | 
					
						
							|  |  |  |  |             desc='eval model:', | 
					
						
							|  |  |  |  |             position=0, | 
					
						
							|  |  |  |  |             leave=True) | 
					
						
							| 
									
										
										
										
											2021-04-27 10:32:17 +08:00
										 |  |  |  |         max_iter = len(valid_dataloader) - 1 if platform.system( | 
					
						
							|  |  |  |  |         ) == "Windows" else len(valid_dataloader) | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |         for idx, batch in enumerate(valid_dataloader): | 
					
						
							| 
									
										
										
										
											2021-04-27 10:32:17 +08:00
										 |  |  |  |             if idx >= max_iter: | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |                 break | 
					
						
							| 
									
										
										
										
											2020-11-06 18:56:53 +08:00
										 |  |  |  |             images = batch[0] | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |             start = time.time() | 
					
						
							| 
									
										
										
										
											2021-09-28 11:51:01 +08:00
										 |  |  |  |             if model_type == 'table' or extra_input: | 
					
						
							| 
									
										
										
										
											2021-06-22 12:39:43 +00:00
										 |  |  |  |                 preds = model(images, data=batch[1:]) | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |             elif model_type in ["kie", 'vqa']: | 
					
						
							| 
									
										
										
										
											2021-10-11 02:35:26 +00:00
										 |  |  |  |                 preds = model(batch) | 
					
						
							| 
									
										
										
										
											2021-01-31 22:37:30 +08:00
										 |  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2021-10-09 18:03:52 +08:00
										 |  |  |  |                 preds = model(images) | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |             batch_numpy = [] | 
					
						
							|  |  |  |  |             for item in batch: | 
					
						
							|  |  |  |  |                 if isinstance(item, paddle.Tensor): | 
					
						
							|  |  |  |  |                     batch_numpy.append(item.numpy()) | 
					
						
							|  |  |  |  |                 else: | 
					
						
							|  |  |  |  |                     batch_numpy.append(item) | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |             # Obtain usable results from post-processing methods | 
					
						
							|  |  |  |  |             total_time += time.time() - start | 
					
						
							|  |  |  |  |             # Evaluate the results of the current batch | 
					
						
							| 
									
										
										
										
											2021-10-11 02:35:26 +00:00
										 |  |  |  |             if model_type in ['table', 'kie']: | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |                 eval_class(preds, batch_numpy) | 
					
						
							|  |  |  |  |             elif model_type in ['vqa']: | 
					
						
							|  |  |  |  |                 post_result = post_process_class(preds, batch_numpy) | 
					
						
							|  |  |  |  |                 eval_class(post_result, batch_numpy) | 
					
						
							| 
									
										
										
										
											2021-06-16 08:47:33 +00:00
										 |  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |                 post_result = post_process_class(preds, batch_numpy[1]) | 
					
						
							|  |  |  |  |                 eval_class(post_result, batch_numpy) | 
					
						
							| 
									
										
										
										
											2021-10-09 17:53:22 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-06 18:56:53 +08:00
										 |  |  |  |             pbar.update(1) | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |             total_frame += len(images) | 
					
						
							| 
									
										
										
										
											2021-01-26 15:16:02 +08:00
										 |  |  |  |         # Get final metric,eg. acc or hmean | 
					
						
							|  |  |  |  |         metric = eval_class.get_metric() | 
					
						
							| 
									
										
										
										
											2020-11-05 15:13:36 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-06 18:56:53 +08:00
										 |  |  |  |     pbar.close() | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |     model.train() | 
					
						
							| 
									
										
										
										
											2021-01-26 15:16:02 +08:00
										 |  |  |  |     metric['fps'] = total_frame / total_time | 
					
						
							|  |  |  |  |     return metric | 
					
						
							| 
									
										
										
										
											2020-08-15 21:54:59 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-15 12:39:07 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-12 13:41:47 +08:00
										 |  |  |  | def update_center(char_center, post_result, preds): | 
					
						
							|  |  |  |  |     result, label = post_result | 
					
						
							|  |  |  |  |     feats, logits = preds | 
					
						
							|  |  |  |  |     logits = paddle.argmax(logits, axis=-1) | 
					
						
							|  |  |  |  |     feats = feats.numpy() | 
					
						
							|  |  |  |  |     logits = logits.numpy() | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     for idx_sample in range(len(label)): | 
					
						
							|  |  |  |  |         if result[idx_sample][0] == label[idx_sample][0]: | 
					
						
							|  |  |  |  |             feat = feats[idx_sample] | 
					
						
							|  |  |  |  |             logit = logits[idx_sample] | 
					
						
							|  |  |  |  |             for idx_time in range(len(logit)): | 
					
						
							|  |  |  |  |                 index = logit[idx_time] | 
					
						
							|  |  |  |  |                 if index in char_center.keys(): | 
					
						
							|  |  |  |  |                     char_center[index][0] = ( | 
					
						
							|  |  |  |  |                         char_center[index][0] * char_center[index][1] + | 
					
						
							|  |  |  |  |                         feat[idx_time]) / (char_center[index][1] + 1) | 
					
						
							|  |  |  |  |                     char_center[index][1] += 1 | 
					
						
							|  |  |  |  |                 else: | 
					
						
							|  |  |  |  |                     char_center[index] = [feat[idx_time], 1] | 
					
						
							|  |  |  |  |     return char_center | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def get_center(model, eval_dataloader, post_process_class): | 
					
						
							|  |  |  |  |     pbar = tqdm(total=len(eval_dataloader), desc='get center:') | 
					
						
							|  |  |  |  |     max_iter = len(eval_dataloader) - 1 if platform.system( | 
					
						
							|  |  |  |  |     ) == "Windows" else len(eval_dataloader) | 
					
						
							|  |  |  |  |     char_center = dict() | 
					
						
							|  |  |  |  |     for idx, batch in enumerate(eval_dataloader): | 
					
						
							|  |  |  |  |         if idx >= max_iter: | 
					
						
							|  |  |  |  |             break | 
					
						
							|  |  |  |  |         images = batch[0] | 
					
						
							|  |  |  |  |         start = time.time() | 
					
						
							|  |  |  |  |         preds = model(images) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         batch = [item.numpy() for item in batch] | 
					
						
							|  |  |  |  |         # Obtain usable results from post-processing methods | 
					
						
							|  |  |  |  |         post_result = post_process_class(preds, batch[1]) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         #update char_center | 
					
						
							|  |  |  |  |         char_center = update_center(char_center, post_result, preds) | 
					
						
							|  |  |  |  |         pbar.update(1) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     pbar.close() | 
					
						
							|  |  |  |  |     for key in char_center.keys(): | 
					
						
							|  |  |  |  |         char_center[key] = char_center[key][0] | 
					
						
							|  |  |  |  |     return char_center | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-18 18:51:19 +08:00
										 |  |  |  | def preprocess(is_train=False): | 
					
						
							| 
									
										
										
										
											2020-08-15 21:54:59 +08:00
										 |  |  |  |     FLAGS = ArgsParser().parse_args() | 
					
						
							| 
									
										
										
										
											2021-09-28 10:01:37 +08:00
										 |  |  |  |     profiler_options = FLAGS.profiler_options | 
					
						
							| 
									
										
										
										
											2020-08-15 21:54:59 +08:00
										 |  |  |  |     config = load_config(FLAGS.config) | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |     config = merge_config(config, FLAGS.opt) | 
					
						
							| 
									
										
										
										
											2021-09-29 01:59:43 +00:00
										 |  |  |  |     profile_dic = {"profiler_options": FLAGS.profiler_options} | 
					
						
							| 
									
										
										
										
											2022-01-05 11:03:45 +00:00
										 |  |  |  |     config = merge_config(config, profile_dic) | 
					
						
							| 
									
										
										
										
											2020-08-15 21:54:59 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-27 19:43:36 +08:00
										 |  |  |  |     if is_train: | 
					
						
							|  |  |  |  |         # save_config | 
					
						
							|  |  |  |  |         save_model_dir = config['Global']['save_model_dir'] | 
					
						
							|  |  |  |  |         os.makedirs(save_model_dir, exist_ok=True) | 
					
						
							|  |  |  |  |         with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f: | 
					
						
							|  |  |  |  |             yaml.dump( | 
					
						
							|  |  |  |  |                 dict(config), f, default_flow_style=False, sort_keys=False) | 
					
						
							|  |  |  |  |         log_file = '{}/train.log'.format(save_model_dir) | 
					
						
							|  |  |  |  |     else: | 
					
						
							|  |  |  |  |         log_file = None | 
					
						
							| 
									
										
										
										
											2022-03-21 23:48:50 -05:00
										 |  |  |  |     logger = get_logger(log_file=log_file) | 
					
						
							| 
									
										
										
										
											2020-08-15 21:54:59 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     # check if set use_gpu=True in paddlepaddle cpu version | 
					
						
							|  |  |  |  |     use_gpu = config['Global']['use_gpu'] | 
					
						
							|  |  |  |  |     check_gpu(use_gpu) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-23 08:31:16 +00:00
										 |  |  |  |     # check if set use_xpu=True in paddlepaddle cpu/gpu version | 
					
						
							|  |  |  |  |     use_xpu = False | 
					
						
							|  |  |  |  |     if 'use_xpu' in config['Global']: | 
					
						
							|  |  |  |  |         use_xpu = config['Global']['use_xpu'] | 
					
						
							|  |  |  |  |     check_xpu(use_xpu) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |     alg = config['Architecture']['algorithm'] | 
					
						
							|  |  |  |  |     assert alg in [ | 
					
						
							| 
									
										
										
										
											2021-03-08 14:15:47 +08:00
										 |  |  |  |         'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN', | 
					
						
							| 
									
										
										
										
											2021-09-27 15:06:06 +08:00
										 |  |  |  |         'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE', | 
					
						
							| 
									
										
										
										
											2022-04-26 16:19:31 +08:00
										 |  |  |  |         'SEED', 'SDMGR', 'LayoutXLM', 'LayoutLM', 'PREN', 'FCE', 'SVTR' | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |     ] | 
					
						
							| 
									
										
										
										
											2020-08-15 21:54:59 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-23 08:31:16 +00:00
										 |  |  |  |     device = 'cpu' | 
					
						
							|  |  |  |  |     if use_gpu: | 
					
						
							|  |  |  |  |         device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) | 
					
						
							|  |  |  |  |     if use_xpu: | 
					
						
							|  |  |  |  |         device = 'xpu' | 
					
						
							| 
									
										
										
										
											2020-10-13 17:13:33 +08:00
										 |  |  |  |     device = paddle.set_device(device) | 
					
						
							| 
									
										
										
										
											2020-11-05 15:13:36 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-04 20:43:27 +08:00
										 |  |  |  |     config['Global']['distributed'] = dist.get_world_size() != 1 | 
					
						
							| 
									
										
										
										
											2021-09-27 19:43:36 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-19 17:29:39 +08:00
										 |  |  |  |     if config['Global']['use_visualdl'] and dist.get_rank() == 0: | 
					
						
							| 
									
										
										
										
											2020-11-04 20:43:27 +08:00
										 |  |  |  |         from visualdl import LogWriter | 
					
						
							| 
									
										
										
										
											2021-02-02 21:27:48 +08:00
										 |  |  |  |         save_model_dir = config['Global']['save_model_dir'] | 
					
						
							| 
									
										
										
										
											2020-11-04 20:43:27 +08:00
										 |  |  |  |         vdl_writer_path = '{}/vdl/'.format(save_model_dir) | 
					
						
							|  |  |  |  |         os.makedirs(vdl_writer_path, exist_ok=True) | 
					
						
							|  |  |  |  |         vdl_writer = LogWriter(logdir=vdl_writer_path) | 
					
						
							|  |  |  |  |     else: | 
					
						
							|  |  |  |  |         vdl_writer = None | 
					
						
							|  |  |  |  |     print_dict(config, logger) | 
					
						
							|  |  |  |  |     logger.info('train with paddle {} and device {}'.format(paddle.__version__, | 
					
						
							|  |  |  |  |                                                             device)) | 
					
						
							|  |  |  |  |     return config, device, logger, vdl_writer |