| 
									
										
										
										
											2022-02-13 11:48:18 +08:00
										 |  |  | # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Licensed under the Apache License, Version 2.0 (the "License"); | 
					
						
							|  |  |  | # you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | # You may obtain a copy of the License at | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #    http://www.apache.org/licenses/LICENSE-2.0 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | # distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | # See the License for the specific language governing permissions and | 
					
						
							|  |  |  | # limitations under the License. | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | This code is refer from: | 
					
						
							|  |  |  | https://github.com/open-mmlab/mmocr/blob/v0.3.0/mmocr/models/textdet/postprocess/wrapper.py | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2022-01-27 17:36:19 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | import cv2 | 
					
						
							|  |  |  | import paddle | 
					
						
							| 
									
										
										
										
											2022-02-13 11:48:18 +08:00
										 |  |  | import numpy as np | 
					
						
							| 
									
										
										
										
											2022-01-27 17:36:19 +08:00
										 |  |  | from numpy.fft import ifft | 
					
						
							| 
									
										
										
										
											2022-02-13 11:48:18 +08:00
										 |  |  | from ppocr.utils.poly_nms import poly_nms, valid_boundary | 
					
						
							| 
									
										
										
										
											2022-01-27 17:36:19 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def fill_hole(input_mask): | 
					
						
							|  |  |  |     h, w = input_mask.shape | 
					
						
							|  |  |  |     canvas = np.zeros((h + 2, w + 2), np.uint8) | 
					
						
							|  |  |  |     canvas[1:h + 1, 1:w + 1] = input_mask.copy() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     mask = np.zeros((h + 4, w + 4), np.uint8) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     cv2.floodFill(canvas, mask, (0, 0), 1) | 
					
						
							|  |  |  |     canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return ~canvas | input_mask | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def fourier2poly(fourier_coeff, num_reconstr_points=50): | 
					
						
							|  |  |  |     """ Inverse Fourier transform
 | 
					
						
							|  |  |  |         Args: | 
					
						
							|  |  |  |             fourier_coeff (ndarray): Fourier coefficients shaped (n, 2k+1), | 
					
						
							|  |  |  |                 with n and k being candidates number and Fourier degree | 
					
						
							|  |  |  |                 respectively. | 
					
						
							|  |  |  |             num_reconstr_points (int): Number of reconstructed polygon points. | 
					
						
							|  |  |  |         Returns: | 
					
						
							|  |  |  |             Polygons (ndarray): The reconstructed polygons shaped (n, n') | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     a = np.zeros((len(fourier_coeff), num_reconstr_points), dtype='complex') | 
					
						
							|  |  |  |     k = (len(fourier_coeff[0]) - 1) // 2 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     a[:, 0:k + 1] = fourier_coeff[:, k:] | 
					
						
							|  |  |  |     a[:, -k:] = fourier_coeff[:, :k] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     poly_complex = ifft(a) * num_reconstr_points | 
					
						
							|  |  |  |     polygon = np.zeros((len(fourier_coeff), num_reconstr_points, 2)) | 
					
						
							|  |  |  |     polygon[:, :, 0] = poly_complex.real | 
					
						
							|  |  |  |     polygon[:, :, 1] = poly_complex.imag | 
					
						
							|  |  |  |     return polygon.astype('int32').reshape((len(fourier_coeff), -1)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class FCEPostProcess(object): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     The post process for FCENet. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self, | 
					
						
							|  |  |  |                  scales, | 
					
						
							|  |  |  |                  fourier_degree=5, | 
					
						
							|  |  |  |                  num_reconstr_points=50, | 
					
						
							|  |  |  |                  decoding_type='fcenet', | 
					
						
							|  |  |  |                  score_thr=0.3, | 
					
						
							|  |  |  |                  nms_thr=0.1, | 
					
						
							|  |  |  |                  alpha=1.0, | 
					
						
							|  |  |  |                  beta=1.0, | 
					
						
							|  |  |  |                  text_repr_type='poly', | 
					
						
							|  |  |  |                  **kwargs): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.scales = scales | 
					
						
							|  |  |  |         self.fourier_degree = fourier_degree | 
					
						
							|  |  |  |         self.num_reconstr_points = num_reconstr_points | 
					
						
							|  |  |  |         self.decoding_type = decoding_type | 
					
						
							|  |  |  |         self.score_thr = score_thr | 
					
						
							|  |  |  |         self.nms_thr = nms_thr | 
					
						
							|  |  |  |         self.alpha = alpha | 
					
						
							|  |  |  |         self.beta = beta | 
					
						
							|  |  |  |         self.text_repr_type = text_repr_type | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __call__(self, preds, shape_list): | 
					
						
							|  |  |  |         score_maps = [] | 
					
						
							|  |  |  |         for key, value in preds.items(): | 
					
						
							|  |  |  |             if isinstance(value, paddle.Tensor): | 
					
						
							|  |  |  |                 value = value.numpy() | 
					
						
							|  |  |  |             cls_res = value[:, :4, :, :] | 
					
						
							|  |  |  |             reg_res = value[:, 4:, :, :] | 
					
						
							|  |  |  |             score_maps.append([cls_res, reg_res]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return self.get_boundary(score_maps, shape_list) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def resize_boundary(self, boundaries, scale_factor): | 
					
						
							|  |  |  |         """Rescale boundaries via scale_factor.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Args: | 
					
						
							|  |  |  |             boundaries (list[list[float]]): The boundary list. Each boundary | 
					
						
							|  |  |  |             with size 2k+1 with k>=4. | 
					
						
							|  |  |  |             scale_factor(ndarray): The scale factor of size (4,). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Returns: | 
					
						
							|  |  |  |             boundaries (list[list[float]]): The scaled boundaries. | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         boxes = [] | 
					
						
							|  |  |  |         scores = [] | 
					
						
							|  |  |  |         for b in boundaries: | 
					
						
							|  |  |  |             sz = len(b) | 
					
						
							|  |  |  |             valid_boundary(b, True) | 
					
						
							|  |  |  |             scores.append(b[-1]) | 
					
						
							|  |  |  |             b = (np.array(b[:sz - 1]) * | 
					
						
							|  |  |  |                  (np.tile(scale_factor[:2], int( | 
					
						
							|  |  |  |                      (sz - 1) / 2)).reshape(1, sz - 1))).flatten().tolist() | 
					
						
							|  |  |  |             boxes.append(np.array(b).reshape([-1, 2])) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return np.array(boxes, dtype=np.float32), scores | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_boundary(self, score_maps, shape_list): | 
					
						
							|  |  |  |         assert len(score_maps) == len(self.scales) | 
					
						
							|  |  |  |         boundaries = [] | 
					
						
							|  |  |  |         for idx, score_map in enumerate(score_maps): | 
					
						
							|  |  |  |             scale = self.scales[idx] | 
					
						
							|  |  |  |             boundaries = boundaries + self._get_boundary_single(score_map, | 
					
						
							|  |  |  |                                                                 scale) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # nms | 
					
						
							|  |  |  |         boundaries = poly_nms(boundaries, self.nms_thr) | 
					
						
							|  |  |  |         boundaries, scores = self.resize_boundary( | 
					
						
							|  |  |  |             boundaries, (1 / shape_list[0, 2:]).tolist()[::-1]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         boxes_batch = [dict(points=boundaries, scores=scores)] | 
					
						
							|  |  |  |         return boxes_batch | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _get_boundary_single(self, score_map, scale): | 
					
						
							|  |  |  |         assert len(score_map) == 2 | 
					
						
							|  |  |  |         assert score_map[1].shape[1] == 4 * self.fourier_degree + 2 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-13 11:48:18 +08:00
										 |  |  |         return self.fcenet_decode( | 
					
						
							| 
									
										
										
										
											2022-01-27 17:36:19 +08:00
										 |  |  |             preds=score_map, | 
					
						
							|  |  |  |             fourier_degree=self.fourier_degree, | 
					
						
							|  |  |  |             num_reconstr_points=self.num_reconstr_points, | 
					
						
							|  |  |  |             scale=scale, | 
					
						
							|  |  |  |             alpha=self.alpha, | 
					
						
							|  |  |  |             beta=self.beta, | 
					
						
							|  |  |  |             text_repr_type=self.text_repr_type, | 
					
						
							|  |  |  |             score_thr=self.score_thr, | 
					
						
							|  |  |  |             nms_thr=self.nms_thr) | 
					
						
							| 
									
										
										
										
											2022-02-13 11:48:18 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def fcenet_decode(self, | 
					
						
							|  |  |  |                       preds, | 
					
						
							|  |  |  |                       fourier_degree, | 
					
						
							|  |  |  |                       num_reconstr_points, | 
					
						
							|  |  |  |                       scale, | 
					
						
							|  |  |  |                       alpha=1.0, | 
					
						
							|  |  |  |                       beta=2.0, | 
					
						
							|  |  |  |                       text_repr_type='poly', | 
					
						
							|  |  |  |                       score_thr=0.3, | 
					
						
							|  |  |  |                       nms_thr=0.1): | 
					
						
							|  |  |  |         """Decoding predictions of FCENet to instances.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Args: | 
					
						
							|  |  |  |             preds (list(Tensor)): The head output tensors. | 
					
						
							|  |  |  |             fourier_degree (int): The maximum Fourier transform degree k. | 
					
						
							|  |  |  |             num_reconstr_points (int): The points number of the polygon | 
					
						
							|  |  |  |                 reconstructed from predicted Fourier coefficients. | 
					
						
							|  |  |  |             scale (int): The down-sample scale of the prediction. | 
					
						
							|  |  |  |             alpha (float) : The parameter to calculate final scores. Score_{final} | 
					
						
							|  |  |  |                     = (Score_{text region} ^ alpha) | 
					
						
							|  |  |  |                     * (Score_{text center region}^ beta) | 
					
						
							|  |  |  |             beta (float) : The parameter to calculate final score. | 
					
						
							|  |  |  |             text_repr_type (str):  Boundary encoding type 'poly' or 'quad'. | 
					
						
							|  |  |  |             score_thr (float) : The threshold used to filter out the final | 
					
						
							|  |  |  |                 candidates. | 
					
						
							|  |  |  |             nms_thr (float) :  The threshold of nms. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Returns: | 
					
						
							|  |  |  |             boundaries (list[list[float]]): The instance boundary and confidence | 
					
						
							|  |  |  |                 list. | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         assert isinstance(preds, list) | 
					
						
							|  |  |  |         assert len(preds) == 2 | 
					
						
							|  |  |  |         assert text_repr_type in ['poly', 'quad'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         cls_pred = preds[0][0] | 
					
						
							|  |  |  |         tr_pred = cls_pred[0:2] | 
					
						
							|  |  |  |         tcl_pred = cls_pred[2:] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         reg_pred = preds[1][0].transpose([1, 2, 0]) | 
					
						
							|  |  |  |         x_pred = reg_pred[:, :, :2 * fourier_degree + 1] | 
					
						
							|  |  |  |         y_pred = reg_pred[:, :, 2 * fourier_degree + 1:] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         score_pred = (tr_pred[1]**alpha) * (tcl_pred[1]**beta) | 
					
						
							|  |  |  |         tr_pred_mask = (score_pred) > score_thr | 
					
						
							|  |  |  |         tr_mask = fill_hole(tr_pred_mask) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         tr_contours, _ = cv2.findContours( | 
					
						
							|  |  |  |             tr_mask.astype(np.uint8), cv2.RETR_TREE, | 
					
						
							|  |  |  |             cv2.CHAIN_APPROX_SIMPLE)  # opencv4 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         mask = np.zeros_like(tr_mask) | 
					
						
							|  |  |  |         boundaries = [] | 
					
						
							|  |  |  |         for cont in tr_contours: | 
					
						
							|  |  |  |             deal_map = mask.copy().astype(np.int8) | 
					
						
							|  |  |  |             cv2.drawContours(deal_map, [cont], -1, 1, -1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             score_map = score_pred * deal_map | 
					
						
							|  |  |  |             score_mask = score_map > 0 | 
					
						
							|  |  |  |             xy_text = np.argwhere(score_mask) | 
					
						
							|  |  |  |             dxy = xy_text[:, 1] + xy_text[:, 0] * 1j | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             x, y = x_pred[score_mask], y_pred[score_mask] | 
					
						
							|  |  |  |             c = x + y * 1j | 
					
						
							|  |  |  |             c[:, fourier_degree] = c[:, fourier_degree] + dxy | 
					
						
							|  |  |  |             c *= scale | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             polygons = fourier2poly(c, num_reconstr_points) | 
					
						
							|  |  |  |             score = score_map[score_mask].reshape(-1, 1) | 
					
						
							|  |  |  |             polygons = poly_nms(np.hstack((polygons, score)).tolist(), nms_thr) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             boundaries = boundaries + polygons | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         boundaries = poly_nms(boundaries, nms_thr) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if text_repr_type == 'quad': | 
					
						
							|  |  |  |             new_boundaries = [] | 
					
						
							|  |  |  |             for boundary in boundaries: | 
					
						
							|  |  |  |                 poly = np.array(boundary[:-1]).reshape(-1, 2).astype(np.float32) | 
					
						
							|  |  |  |                 score = boundary[-1] | 
					
						
							|  |  |  |                 points = cv2.boxPoints(cv2.minAreaRect(poly)) | 
					
						
							|  |  |  |                 points = np.int0(points) | 
					
						
							|  |  |  |                 new_boundaries.append(points.reshape(-1).tolist() + [score]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return boundaries |