mirror of
				https://github.com/PaddlePaddle/PaddleOCR.git
				synced 2025-10-31 09:49:30 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			222 lines
		
	
	
		
			8.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			222 lines
		
	
	
		
			8.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 | |
| #
 | |
| # Licensed under the Apache License, Version 2.0 (the "License");
 | |
| # you may not use this file except in compliance with the License.
 | |
| # You may obtain a copy of the License at
 | |
| #
 | |
| #     http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing, software
 | |
| # distributed under the License is distributed on an "AS IS" BASIS,
 | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| # See the License for the specific language governing permissions and
 | |
| # limitations under the License.
 | |
| 
 | |
| import os
 | |
| import sys
 | |
| import subprocess
 | |
| 
 | |
| __dir__ = os.path.dirname(os.path.abspath(__file__))
 | |
| sys.path.append(__dir__)
 | |
| sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
 | |
| sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
 | |
| 
 | |
| os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
 | |
| import cv2
 | |
| import copy
 | |
| import numpy as np
 | |
| import time
 | |
| import tools.infer.predict_rec as predict_rec
 | |
| import tools.infer.predict_det as predict_det
 | |
| from ppocr.utils.utility import get_image_file_list, check_and_read_gif
 | |
| from ppocr.utils.logging import get_logger
 | |
| from ppstructure.table.matcher import distance, compute_iou
 | |
| from ppstructure.utility import parse_args
 | |
| import ppstructure.table.predict_structure as predict_strture
 | |
| 
 | |
| logger = get_logger()
 | |
| 
 | |
| 
 | |
| def expand(pix, det_box, shape):
 | |
|     x0, y0, x1, y1 = det_box
 | |
|     #     print(shape)
 | |
|     h, w, c = shape
 | |
|     tmp_x0 = x0 - pix
 | |
|     tmp_x1 = x1 + pix
 | |
|     tmp_y0 = y0 - pix
 | |
|     tmp_y1 = y1 + pix
 | |
|     x0_ = tmp_x0 if tmp_x0 >= 0 else 0
 | |
|     x1_ = tmp_x1 if tmp_x1 <= w else w
 | |
|     y0_ = tmp_y0 if tmp_y0 >= 0 else 0
 | |
|     y1_ = tmp_y1 if tmp_y1 <= h else h
 | |
|     return x0_, y0_, x1_, y1_
 | |
| 
 | |
| 
 | |
| class TableSystem(object):
 | |
|     def __init__(self, args, text_detector=None, text_recognizer=None):
 | |
|         self.text_detector = predict_det.TextDetector(args) if text_detector is None else text_detector
 | |
|         self.text_recognizer = predict_rec.TextRecognizer(args) if text_recognizer is None else text_recognizer
 | |
|         self.table_structurer = predict_strture.TableStructurer(args)
 | |
| 
 | |
|     def __call__(self, img):
 | |
|         ori_im = img.copy()
 | |
|         structure_res, elapse = self.table_structurer(copy.deepcopy(img))
 | |
|         dt_boxes, elapse = self.text_detector(copy.deepcopy(img))
 | |
|         dt_boxes = sorted_boxes(dt_boxes)
 | |
| 
 | |
|         r_boxes = []
 | |
|         for box in dt_boxes:
 | |
|             x_min = box[:, 0].min() - 1
 | |
|             x_max = box[:, 0].max() + 1
 | |
|             y_min = box[:, 1].min() - 1
 | |
|             y_max = box[:, 1].max() + 1
 | |
|             box = [x_min, y_min, x_max, y_max]
 | |
|             r_boxes.append(box)
 | |
|         dt_boxes = np.array(r_boxes)
 | |
| 
 | |
|         logger.debug("dt_boxes num : {}, elapse : {}".format(
 | |
|             len(dt_boxes), elapse))
 | |
|         if dt_boxes is None:
 | |
|             return None, None
 | |
|         img_crop_list = []
 | |
| 
 | |
|         for i in range(len(dt_boxes)):
 | |
|             det_box = dt_boxes[i]
 | |
|             x0, y0, x1, y1 = expand(2, det_box, ori_im.shape)
 | |
|             text_rect = ori_im[int(y0):int(y1), int(x0):int(x1), :]
 | |
|             img_crop_list.append(text_rect)
 | |
|         rec_res, elapse = self.text_recognizer(img_crop_list)
 | |
|         logger.debug("rec_res num  : {}, elapse : {}".format(
 | |
|             len(rec_res), elapse))
 | |
| 
 | |
|         pred_html, pred = self.rebuild_table(structure_res, dt_boxes, rec_res)
 | |
|         return pred_html
 | |
| 
 | |
|     def rebuild_table(self, structure_res, dt_boxes, rec_res):
 | |
|         pred_structures, pred_bboxes = structure_res
 | |
|         matched_index = self.match_result(dt_boxes, pred_bboxes)
 | |
|         pred_html, pred = self.get_pred_html(pred_structures, matched_index, rec_res)
 | |
|         return pred_html, pred
 | |
| 
 | |
|     def match_result(self, dt_boxes, pred_bboxes):
 | |
|         matched = {}
 | |
|         for i, gt_box in enumerate(dt_boxes):
 | |
|             # gt_box = [np.min(gt_box[:, 0]), np.min(gt_box[:, 1]), np.max(gt_box[:, 0]), np.max(gt_box[:, 1])]
 | |
|             distances = []
 | |
|             for j, pred_box in enumerate(pred_bboxes):
 | |
|                 distances.append(
 | |
|                     (distance(gt_box, pred_box), 1. - compute_iou(gt_box, pred_box)))  # 获取两两cell之间的L1距离和 1- IOU
 | |
|             sorted_distances = distances.copy()
 | |
|             # 根据距离和IOU挑选最"近"的cell
 | |
|             sorted_distances = sorted(sorted_distances, key=lambda item: (item[1], item[0]))
 | |
|             if distances.index(sorted_distances[0]) not in matched.keys():
 | |
|                 matched[distances.index(sorted_distances[0])] = [i]
 | |
|             else:
 | |
|                 matched[distances.index(sorted_distances[0])].append(i)
 | |
|         return matched
 | |
| 
 | |
|     def get_pred_html(self, pred_structures, matched_index, ocr_contents):
 | |
|         end_html = []
 | |
|         td_index = 0
 | |
|         for tag in pred_structures:
 | |
|             if '</td>' in tag:
 | |
|                 if td_index in matched_index.keys():
 | |
|                     b_with = False
 | |
|                     if '<b>' in ocr_contents[matched_index[td_index][0]] and len(matched_index[td_index]) > 1:
 | |
|                         b_with = True
 | |
|                         end_html.extend('<b>')
 | |
|                     for i, td_index_index in enumerate(matched_index[td_index]):
 | |
|                         content = ocr_contents[td_index_index][0]
 | |
|                         if len(matched_index[td_index]) > 1:
 | |
|                             if len(content) == 0:
 | |
|                                 continue
 | |
|                             if content[0] == ' ':
 | |
|                                 content = content[1:]
 | |
|                             if '<b>' in content:
 | |
|                                 content = content[3:]
 | |
|                             if '</b>' in content:
 | |
|                                 content = content[:-4]
 | |
|                             if len(content) == 0:
 | |
|                                 continue
 | |
|                             if i != len(matched_index[td_index]) - 1 and ' ' != content[-1]:
 | |
|                                 content += ' '
 | |
|                         end_html.extend(content)
 | |
|                     if b_with:
 | |
|                         end_html.extend('</b>')
 | |
| 
 | |
|                 end_html.append(tag)
 | |
|                 td_index += 1
 | |
|             else:
 | |
|                 end_html.append(tag)
 | |
|         return ''.join(end_html), end_html
 | |
| 
 | |
| 
 | |
| def sorted_boxes(dt_boxes):
 | |
|     """
 | |
|     Sort text boxes in order from top to bottom, left to right
 | |
|     args:
 | |
|         dt_boxes(array):detected text boxes with shape [4, 2]
 | |
|     return:
 | |
|         sorted boxes(array) with shape [4, 2]
 | |
|     """
 | |
|     num_boxes = dt_boxes.shape[0]
 | |
|     sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
 | |
|     _boxes = list(sorted_boxes)
 | |
| 
 | |
|     for i in range(num_boxes - 1):
 | |
|         if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
 | |
|                 (_boxes[i + 1][0][0] < _boxes[i][0][0]):
 | |
|             tmp = _boxes[i]
 | |
|             _boxes[i] = _boxes[i + 1]
 | |
|             _boxes[i + 1] = tmp
 | |
|     return _boxes
 | |
| 
 | |
| 
 | |
| def to_excel(html_table, excel_path):
 | |
|     from tablepyxl import tablepyxl
 | |
|     tablepyxl.document_to_xl(html_table, excel_path)
 | |
| 
 | |
| 
 | |
| def main(args):
 | |
|     image_file_list = get_image_file_list(args.image_dir)
 | |
|     image_file_list = image_file_list[args.process_id::args.total_process_num]
 | |
|     os.makedirs(args.output, exist_ok=True)
 | |
| 
 | |
|     text_sys = TableSystem(args)
 | |
|     img_num = len(image_file_list)
 | |
|     for i, image_file in enumerate(image_file_list):
 | |
|         logger.info("[{}/{}] {}".format(i, img_num, image_file))
 | |
|         img, flag = check_and_read_gif(image_file)
 | |
|         excel_path = os.path.join(args.output, os.path.basename(image_file).split('.')[0] + '.xlsx')
 | |
|         if not flag:
 | |
|             img = cv2.imread(image_file)
 | |
|         if img is None:
 | |
|             logger.error("error in loading image:{}".format(image_file))
 | |
|             continue
 | |
|         starttime = time.time()
 | |
|         pred_html = text_sys(img)
 | |
| 
 | |
|         to_excel(pred_html, excel_path)
 | |
|         logger.info('excel saved to {}'.format(excel_path))
 | |
|         logger.info(pred_html)
 | |
|         elapse = time.time() - starttime
 | |
|         logger.info("Predict time : {:.3f}s".format(elapse))
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     args = parse_args()
 | |
|     if args.use_mp:
 | |
|         p_list = []
 | |
|         total_process_num = args.total_process_num
 | |
|         for process_id in range(total_process_num):
 | |
|             cmd = [sys.executable, "-u"] + sys.argv + [
 | |
|                 "--process_id={}".format(process_id),
 | |
|                 "--use_mp={}".format(False)
 | |
|             ]
 | |
|             p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout)
 | |
|             p_list.append(p)
 | |
|         for p in p_list:
 | |
|             p.wait()
 | |
|     else:
 | |
|         main(args)
 | 
