| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. | 
					
						
							|  |  |  |  | # | 
					
						
							|  |  |  |  | # Licensed under the Apache License, Version 2.0 (the "License"); | 
					
						
							|  |  |  |  | # you may not use this file except in compliance with the License. | 
					
						
							|  |  |  |  | # You may obtain a copy of the License at | 
					
						
							|  |  |  |  | # | 
					
						
							|  |  |  |  | #     http://www.apache.org/licenses/LICENSE-2.0 | 
					
						
							|  |  |  |  | # | 
					
						
							|  |  |  |  | # Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  |  | # distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  |  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  |  | # See the License for the specific language governing permissions and | 
					
						
							|  |  |  |  | # limitations under the License. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | import os | 
					
						
							|  |  |  |  | import sys | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | __dir__ = os.path.dirname(__file__) | 
					
						
							|  |  |  |  | sys.path.append(os.path.join(__dir__, '')) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | import cv2 | 
					
						
							|  |  |  |  | import numpy as np | 
					
						
							|  |  |  |  | from pathlib import Path | 
					
						
							|  |  |  |  | import tarfile | 
					
						
							|  |  |  |  | import requests | 
					
						
							|  |  |  |  | from tqdm import tqdm | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | from tools.infer import predict_system | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  | from ppocr.utils.logging import get_logger | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  | logger = get_logger() | 
					
						
							| 
									
										
										
										
											2020-08-24 11:30:56 +08:00
										 |  |  |  | from ppocr.utils.utility import check_and_read_gif, get_image_file_list | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | __all__ = ['PaddleOCR'] | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  | model_urls = { | 
					
						
							|  |  |  |  |     'det': | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |     'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar', | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |     'rec': { | 
					
						
							|  |  |  |  |         'ch': { | 
					
						
							|  |  |  |  |             'url': | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |             'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar', | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |             'dict_path': './ppocr/utils/ppocr_keys_v1.txt' | 
					
						
							|  |  |  |  |         }, | 
					
						
							|  |  |  |  |         'en': { | 
					
						
							|  |  |  |  |             'url': | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |             'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar', | 
					
						
							|  |  |  |  |             'dict_path': './ppocr/utils/dict/en_dict.txt' | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |         }, | 
					
						
							|  |  |  |  |         'french': { | 
					
						
							|  |  |  |  |             'url': | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |             'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar', | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |             'dict_path': './ppocr/utils/dict/french_dict.txt' | 
					
						
							|  |  |  |  |         }, | 
					
						
							|  |  |  |  |         'german': { | 
					
						
							|  |  |  |  |             'url': | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |             'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar', | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |             'dict_path': './ppocr/utils/dict/german_dict.txt' | 
					
						
							|  |  |  |  |         }, | 
					
						
							|  |  |  |  |         'korean': { | 
					
						
							|  |  |  |  |             'url': | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |             'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar', | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |             'dict_path': './ppocr/utils/dict/korean_dict.txt' | 
					
						
							|  |  |  |  |         }, | 
					
						
							|  |  |  |  |         'japan': { | 
					
						
							|  |  |  |  |             'url': | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |             'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar', | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |             'dict_path': './ppocr/utils/dict/japan_dict.txt' | 
					
						
							|  |  |  |  |         } | 
					
						
							|  |  |  |  |     }, | 
					
						
							|  |  |  |  |     'cls': | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |     'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar' | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | SUPPORT_DET_MODEL = ['DB'] | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  | VERSION = 2.0 | 
					
						
							| 
									
										
										
										
											2020-08-24 11:30:56 +08:00
										 |  |  |  | SUPPORT_REC_MODEL = ['CRNN'] | 
					
						
							|  |  |  |  | BASE_DIR = os.path.expanduser("~/.paddleocr/") | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def download_with_progressbar(url, save_path): | 
					
						
							|  |  |  |  |     response = requests.get(url, stream=True) | 
					
						
							|  |  |  |  |     total_size_in_bytes = int(response.headers.get('content-length', 0)) | 
					
						
							|  |  |  |  |     block_size = 1024  # 1 Kibibyte | 
					
						
							|  |  |  |  |     progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True) | 
					
						
							|  |  |  |  |     with open(save_path, 'wb') as file: | 
					
						
							|  |  |  |  |         for data in response.iter_content(block_size): | 
					
						
							|  |  |  |  |             progress_bar.update(len(data)) | 
					
						
							|  |  |  |  |             file.write(data) | 
					
						
							|  |  |  |  |     progress_bar.close() | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |     if total_size_in_bytes == 0 or progress_bar.n != total_size_in_bytes: | 
					
						
							|  |  |  |  |         logger.error("Something went wrong while downloading models") | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  |         sys.exit(0) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-24 11:30:56 +08:00
										 |  |  |  | def maybe_download(model_storage_directory, url): | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  |     # using custom model | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |     tar_file_name_list = [ | 
					
						
							|  |  |  |  |         'inference.pdiparams', 'inference.pdiparams.info', 'inference.pdmodel' | 
					
						
							|  |  |  |  |     ] | 
					
						
							|  |  |  |  |     if not os.path.exists( | 
					
						
							|  |  |  |  |             os.path.join(model_storage_directory, 'inference.pdiparams') | 
					
						
							|  |  |  |  |     ) or not os.path.exists( | 
					
						
							|  |  |  |  |             os.path.join(model_storage_directory, 'inference.pdmodel')): | 
					
						
							| 
									
										
										
										
											2020-08-24 11:30:56 +08:00
										 |  |  |  |         tmp_path = os.path.join(model_storage_directory, url.split('/')[-1]) | 
					
						
							|  |  |  |  |         print('download {} to {}'.format(url, tmp_path)) | 
					
						
							|  |  |  |  |         os.makedirs(model_storage_directory, exist_ok=True) | 
					
						
							|  |  |  |  |         download_with_progressbar(url, tmp_path) | 
					
						
							|  |  |  |  |         with tarfile.open(tmp_path, 'r') as tarObj: | 
					
						
							|  |  |  |  |             for member in tarObj.getmembers(): | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |                 filename = None | 
					
						
							|  |  |  |  |                 for tar_file_name in tar_file_name_list: | 
					
						
							|  |  |  |  |                     if tar_file_name in member.name: | 
					
						
							|  |  |  |  |                         filename = tar_file_name | 
					
						
							|  |  |  |  |                 if filename is None: | 
					
						
							| 
									
										
										
										
											2020-08-24 11:30:56 +08:00
										 |  |  |  |                     continue | 
					
						
							|  |  |  |  |                 file = tarObj.extractfile(member) | 
					
						
							|  |  |  |  |                 with open( | 
					
						
							|  |  |  |  |                         os.path.join(model_storage_directory, filename), | 
					
						
							|  |  |  |  |                         'wb') as f: | 
					
						
							|  |  |  |  |                     f.write(file.read()) | 
					
						
							|  |  |  |  |         os.remove(tmp_path) | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  | def parse_args(mMain=True, add_help=True): | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  |     import argparse | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |     def str2bool(v): | 
					
						
							|  |  |  |  |         return v.lower() in ("true", "t", "1") | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |     if mMain: | 
					
						
							|  |  |  |  |         parser = argparse.ArgumentParser(add_help=add_help) | 
					
						
							|  |  |  |  |         # params for prediction engine | 
					
						
							|  |  |  |  |         parser.add_argument("--use_gpu", type=str2bool, default=True) | 
					
						
							|  |  |  |  |         parser.add_argument("--ir_optim", type=str2bool, default=True) | 
					
						
							|  |  |  |  |         parser.add_argument("--use_tensorrt", type=str2bool, default=False) | 
					
						
							|  |  |  |  |         parser.add_argument("--gpu_mem", type=int, default=8000) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # params for text detector | 
					
						
							|  |  |  |  |         parser.add_argument("--image_dir", type=str) | 
					
						
							|  |  |  |  |         parser.add_argument("--det_algorithm", type=str, default='DB') | 
					
						
							|  |  |  |  |         parser.add_argument("--det_model_dir", type=str, default=None) | 
					
						
							|  |  |  |  |         parser.add_argument("--det_limit_side_len", type=float, default=960) | 
					
						
							|  |  |  |  |         parser.add_argument("--det_limit_type", type=str, default='max') | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # DB parmas | 
					
						
							|  |  |  |  |         parser.add_argument("--det_db_thresh", type=float, default=0.3) | 
					
						
							|  |  |  |  |         parser.add_argument("--det_db_box_thresh", type=float, default=0.5) | 
					
						
							|  |  |  |  |         parser.add_argument("--det_db_unclip_ratio", type=float, default=2.0) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # EAST parmas | 
					
						
							|  |  |  |  |         parser.add_argument("--det_east_score_thresh", type=float, default=0.8) | 
					
						
							|  |  |  |  |         parser.add_argument("--det_east_cover_thresh", type=float, default=0.1) | 
					
						
							|  |  |  |  |         parser.add_argument("--det_east_nms_thresh", type=float, default=0.2) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # params for text recognizer | 
					
						
							|  |  |  |  |         parser.add_argument("--rec_algorithm", type=str, default='CRNN') | 
					
						
							|  |  |  |  |         parser.add_argument("--rec_model_dir", type=str, default=None) | 
					
						
							|  |  |  |  |         parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320") | 
					
						
							|  |  |  |  |         parser.add_argument("--rec_char_type", type=str, default='ch') | 
					
						
							|  |  |  |  |         parser.add_argument("--rec_batch_num", type=int, default=30) | 
					
						
							|  |  |  |  |         parser.add_argument("--max_text_length", type=int, default=25) | 
					
						
							|  |  |  |  |         parser.add_argument("--rec_char_dict_path", type=str, default=None) | 
					
						
							|  |  |  |  |         parser.add_argument("--use_space_char", type=bool, default=True) | 
					
						
							|  |  |  |  |         parser.add_argument("--drop_score", type=float, default=0.5) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # params for text classifier | 
					
						
							|  |  |  |  |         parser.add_argument("--cls_model_dir", type=str, default=None) | 
					
						
							|  |  |  |  |         parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192") | 
					
						
							|  |  |  |  |         parser.add_argument("--label_list", type=list, default=['0', '180']) | 
					
						
							|  |  |  |  |         parser.add_argument("--cls_batch_num", type=int, default=30) | 
					
						
							|  |  |  |  |         parser.add_argument("--cls_thresh", type=float, default=0.9) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         parser.add_argument("--enable_mkldnn", type=bool, default=False) | 
					
						
							|  |  |  |  |         parser.add_argument("--use_zero_copy_run", type=bool, default=False) | 
					
						
							|  |  |  |  |         parser.add_argument("--use_pdserving", type=str2bool, default=False) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         parser.add_argument("--lang", type=str, default='ch') | 
					
						
							|  |  |  |  |         parser.add_argument("--det", type=str2bool, default=True) | 
					
						
							|  |  |  |  |         parser.add_argument("--rec", type=str2bool, default=True) | 
					
						
							|  |  |  |  |         parser.add_argument("--use_angle_cls", type=str2bool, default=False) | 
					
						
							|  |  |  |  |         return parser.parse_args() | 
					
						
							|  |  |  |  |     else: | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |         return argparse.Namespace( | 
					
						
							|  |  |  |  |             use_gpu=True, | 
					
						
							|  |  |  |  |             ir_optim=True, | 
					
						
							|  |  |  |  |             use_tensorrt=False, | 
					
						
							|  |  |  |  |             gpu_mem=8000, | 
					
						
							|  |  |  |  |             image_dir='', | 
					
						
							|  |  |  |  |             det_algorithm='DB', | 
					
						
							|  |  |  |  |             det_model_dir=None, | 
					
						
							|  |  |  |  |             det_limit_side_len=960, | 
					
						
							|  |  |  |  |             det_limit_type='max', | 
					
						
							|  |  |  |  |             det_db_thresh=0.3, | 
					
						
							|  |  |  |  |             det_db_box_thresh=0.5, | 
					
						
							|  |  |  |  |             det_db_unclip_ratio=2.0, | 
					
						
							|  |  |  |  |             det_east_score_thresh=0.8, | 
					
						
							|  |  |  |  |             det_east_cover_thresh=0.1, | 
					
						
							|  |  |  |  |             det_east_nms_thresh=0.2, | 
					
						
							|  |  |  |  |             rec_algorithm='CRNN', | 
					
						
							|  |  |  |  |             rec_model_dir=None, | 
					
						
							|  |  |  |  |             rec_image_shape="3, 32, 320", | 
					
						
							|  |  |  |  |             rec_char_type='ch', | 
					
						
							|  |  |  |  |             rec_batch_num=30, | 
					
						
							|  |  |  |  |             max_text_length=25, | 
					
						
							|  |  |  |  |             rec_char_dict_path=None, | 
					
						
							|  |  |  |  |             use_space_char=True, | 
					
						
							|  |  |  |  |             drop_score=0.5, | 
					
						
							|  |  |  |  |             cls_model_dir=None, | 
					
						
							|  |  |  |  |             cls_image_shape="3, 48, 192", | 
					
						
							|  |  |  |  |             label_list=['0', '180'], | 
					
						
							|  |  |  |  |             cls_batch_num=30, | 
					
						
							|  |  |  |  |             cls_thresh=0.9, | 
					
						
							|  |  |  |  |             enable_mkldnn=False, | 
					
						
							|  |  |  |  |             use_zero_copy_run=False, | 
					
						
							|  |  |  |  |             use_pdserving=False, | 
					
						
							|  |  |  |  |             lang='ch', | 
					
						
							|  |  |  |  |             det=True, | 
					
						
							|  |  |  |  |             rec=True, | 
					
						
							|  |  |  |  |             use_angle_cls=False) | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | class PaddleOCR(predict_system.TextSystem): | 
					
						
							| 
									
										
										
										
											2020-08-24 11:30:56 +08:00
										 |  |  |  |     def __init__(self, **kwargs): | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  |         """
 | 
					
						
							|  |  |  |  |         paddleocr package | 
					
						
							|  |  |  |  |         args: | 
					
						
							|  |  |  |  |             **kwargs: other params show in paddleocr --help | 
					
						
							|  |  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |         postprocess_params = parse_args(mMain=False, add_help=False) | 
					
						
							| 
									
										
										
										
											2020-08-24 11:30:56 +08:00
										 |  |  |  |         postprocess_params.__dict__.update(**kwargs) | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |         self.use_angle_cls = postprocess_params.use_angle_cls | 
					
						
							|  |  |  |  |         lang = postprocess_params.lang | 
					
						
							|  |  |  |  |         assert lang in model_urls[ | 
					
						
							|  |  |  |  |             'rec'], 'param lang must in {}, but got {}'.format( | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |                 model_urls['rec'].keys(), lang) | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |         if postprocess_params.rec_char_dict_path is None: | 
					
						
							|  |  |  |  |             postprocess_params.rec_char_dict_path = model_urls['rec'][lang][ | 
					
						
							|  |  |  |  |                 'dict_path'] | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-24 11:30:56 +08:00
										 |  |  |  |         # init model dir | 
					
						
							|  |  |  |  |         if postprocess_params.det_model_dir is None: | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |             postprocess_params.det_model_dir = os.path.join( | 
					
						
							|  |  |  |  |                 BASE_DIR, '{}/det'.format(VERSION)) | 
					
						
							| 
									
										
										
										
											2020-08-24 11:30:56 +08:00
										 |  |  |  |         if postprocess_params.rec_model_dir is None: | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |             postprocess_params.rec_model_dir = os.path.join( | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |                 BASE_DIR, '{}/rec/{}'.format(VERSION, lang)) | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |         if postprocess_params.cls_model_dir is None: | 
					
						
							| 
									
										
										
										
											2020-12-11 22:06:42 +08:00
										 |  |  |  |             postprocess_params.cls_model_dir = os.path.join( | 
					
						
							|  |  |  |  |                 BASE_DIR, '{}/cls'.format(VERSION)) | 
					
						
							| 
									
										
										
										
											2020-08-24 11:30:56 +08:00
										 |  |  |  |         print(postprocess_params) | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  |         # download model | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |         maybe_download(postprocess_params.det_model_dir, model_urls['det']) | 
					
						
							|  |  |  |  |         maybe_download(postprocess_params.rec_model_dir, | 
					
						
							|  |  |  |  |                        model_urls['rec'][lang]['url']) | 
					
						
							|  |  |  |  |         maybe_download(postprocess_params.cls_model_dir, model_urls['cls']) | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |         if postprocess_params.det_algorithm not in SUPPORT_DET_MODEL: | 
					
						
							|  |  |  |  |             logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL)) | 
					
						
							|  |  |  |  |             sys.exit(0) | 
					
						
							|  |  |  |  |         if postprocess_params.rec_algorithm not in SUPPORT_REC_MODEL: | 
					
						
							|  |  |  |  |             logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL)) | 
					
						
							|  |  |  |  |             sys.exit(0) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         postprocess_params.rec_char_dict_path = Path( | 
					
						
							|  |  |  |  |             __file__).parent / postprocess_params.rec_char_dict_path | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         # init det_model and rec_model | 
					
						
							|  |  |  |  |         super().__init__(postprocess_params) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |     def ocr(self, img, det=True, rec=True, cls=False): | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  |         """
 | 
					
						
							|  |  |  |  |         ocr with paddleocr | 
					
						
							|  |  |  |  |         args: | 
					
						
							|  |  |  |  |             img: img for ocr, support ndarray, img_path and list or ndarray | 
					
						
							|  |  |  |  |             det: use text detection or not, if false, only rec will be exec. default is True | 
					
						
							|  |  |  |  |             rec: use text recognition or not, if false, only det will be exec. default is True | 
					
						
							|  |  |  |  |         """
 | 
					
						
							|  |  |  |  |         assert isinstance(img, (np.ndarray, list, str)) | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |         if isinstance(img, list) and det == True: | 
					
						
							|  |  |  |  |             logger.error('When input a list of images, det must be false') | 
					
						
							|  |  |  |  |             exit(0) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         self.use_angle_cls = cls | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  |         if isinstance(img, str): | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |             # download net image | 
					
						
							|  |  |  |  |             if img.startswith('http'): | 
					
						
							|  |  |  |  |                 download_with_progressbar(img, 'tmp.jpg') | 
					
						
							|  |  |  |  |                 img = 'tmp.jpg' | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  |             image_file = img | 
					
						
							|  |  |  |  |             img, flag = check_and_read_gif(image_file) | 
					
						
							|  |  |  |  |             if not flag: | 
					
						
							|  |  |  |  |                 img = cv2.imread(image_file) | 
					
						
							|  |  |  |  |             if img is None: | 
					
						
							|  |  |  |  |                 logger.error("error in loading image:{}".format(image_file)) | 
					
						
							|  |  |  |  |                 return None | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |         if isinstance(img, np.ndarray) and len(img.shape) == 2: | 
					
						
							|  |  |  |  |             img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  |         if det and rec: | 
					
						
							|  |  |  |  |             dt_boxes, rec_res = self.__call__(img) | 
					
						
							|  |  |  |  |             return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)] | 
					
						
							|  |  |  |  |         elif det and not rec: | 
					
						
							|  |  |  |  |             dt_boxes, elapse = self.text_detector(img) | 
					
						
							|  |  |  |  |             if dt_boxes is None: | 
					
						
							|  |  |  |  |                 return None | 
					
						
							|  |  |  |  |             return [box.tolist() for box in dt_boxes] | 
					
						
							|  |  |  |  |         else: | 
					
						
							|  |  |  |  |             if not isinstance(img, list): | 
					
						
							|  |  |  |  |                 img = [img] | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |             if self.use_angle_cls: | 
					
						
							|  |  |  |  |                 img, cls_res, elapse = self.text_classifier(img) | 
					
						
							|  |  |  |  |                 if not rec: | 
					
						
							|  |  |  |  |                     return cls_res | 
					
						
							| 
									
										
										
										
											2020-08-22 19:42:14 +08:00
										 |  |  |  |             rec_res, elapse = self.text_recognizer(img) | 
					
						
							|  |  |  |  |             return rec_res | 
					
						
							| 
									
										
										
										
											2020-08-24 11:30:56 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | def main(): | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |     # for cmd | 
					
						
							|  |  |  |  |     args = parse_args(mMain=True) | 
					
						
							|  |  |  |  |     image_dir = args.image_dir | 
					
						
							|  |  |  |  |     if image_dir.startswith('http'): | 
					
						
							|  |  |  |  |         download_with_progressbar(image_dir, 'tmp.jpg') | 
					
						
							|  |  |  |  |         image_file_list = ['tmp.jpg'] | 
					
						
							|  |  |  |  |     else: | 
					
						
							|  |  |  |  |         image_file_list = get_image_file_list(args.image_dir) | 
					
						
							| 
									
										
										
										
											2020-08-24 11:30:56 +08:00
										 |  |  |  |     if len(image_file_list) == 0: | 
					
						
							|  |  |  |  |         logger.error('no images find in {}'.format(args.image_dir)) | 
					
						
							|  |  |  |  |         return | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     ocr_engine = PaddleOCR(**(args.__dict__)) | 
					
						
							| 
									
										
										
										
											2020-08-24 11:30:56 +08:00
										 |  |  |  |     for img_path in image_file_list: | 
					
						
							| 
									
										
										
										
											2020-12-07 19:10:19 +08:00
										 |  |  |  |         logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10)) | 
					
						
							|  |  |  |  |         result = ocr_engine.ocr(img_path, | 
					
						
							|  |  |  |  |                                 det=args.det, | 
					
						
							|  |  |  |  |                                 rec=args.rec, | 
					
						
							|  |  |  |  |                                 cls=args.use_angle_cls) | 
					
						
							|  |  |  |  |         if result is not None: | 
					
						
							|  |  |  |  |             for line in result: | 
					
						
							|  |  |  |  |                 logger.info(line) |