PaddleOCR/ppocr/postprocess/fce_postprocess.py

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import cv2
import paddle
from numpy.fft import ifft
import Polygon as plg


def points2polygon(points):
    """Convert k points to 1 polygon.

    Args:
        points (ndarray or list): A ndarray or a list of shape (2k)
            that indicates k points.

    Returns:
        polygon (Polygon): A polygon object.
    """
    if isinstance(points, list):
        points = np.array(points)

    assert isinstance(points, np.ndarray)
    assert (points.size % 2 == 0) and (points.size >= 8)

    point_mat = points.reshape([-1, 2])
    return plg.Polygon(point_mat)


def poly_intersection(poly_det, poly_gt):
    """Calculate the intersection area between two polygon.

    Args:
        poly_det (Polygon): A polygon predicted by detector.
        poly_gt (Polygon): A gt polygon.

    Returns:
        intersection_area (float): The intersection area between two polygons.
    """
    assert isinstance(poly_det, plg.Polygon)
    assert isinstance(poly_gt, plg.Polygon)

    poly_inter = poly_det & poly_gt
    if len(poly_inter) == 0:
        return 0, poly_inter
    return poly_inter.area(), poly_inter


def poly_union(poly_det, poly_gt):
    """Calculate the union area between two polygon.

    Args:
        poly_det (Polygon): A polygon predicted by detector.
        poly_gt (Polygon): A gt polygon.

    Returns:
        union_area (float): The union area between two polygons.
    """
    assert isinstance(poly_det, plg.Polygon)
    assert isinstance(poly_gt, plg.Polygon)

    area_det = poly_det.area()
    area_gt = poly_gt.area()
    area_inters, _ = poly_intersection(poly_det, poly_gt)
    return area_det + area_gt - area_inters


def valid_boundary(x, with_score=True):
    num = len(x)
    if num < 8:
        return False
    if num % 2 == 0 and (not with_score):
        return True
    if num % 2 == 1 and with_score:
        return True

    return False


def boundary_iou(src, target):
    """Calculate the IOU between two boundaries.

    Args:
       src (list): Source boundary.
       target (list): Target boundary.

    Returns:
       iou (float): The iou between two boundaries.
    """
    assert valid_boundary(src, False)
    assert valid_boundary(target, False)
    src_poly = points2polygon(src)
    target_poly = points2polygon(target)

    return poly_iou(src_poly, target_poly)


def poly_iou(poly_det, poly_gt):
    """Calculate the IOU between two polygons.

    Args:
        poly_det (Polygon): A polygon predicted by detector.
        poly_gt (Polygon): A gt polygon.

    Returns:
        iou (float): The IOU between two polygons.
    """
    assert isinstance(poly_det, plg.Polygon)
    assert isinstance(poly_gt, plg.Polygon)
    area_inters, _ = poly_intersection(poly_det, poly_gt)
    area_union = poly_union(poly_det, poly_gt)
    if area_union == 0:
        return 0.0
    return area_inters / area_union


def poly_nms(polygons, threshold):
    assert isinstance(polygons, list)

    polygons = np.array(sorted(polygons, key=lambda x: x[-1]))

    keep_poly = []
    index = [i for i in range(polygons.shape[0])]

    while len(index) > 0:
        keep_poly.append(polygons[index[-1]].tolist())
        A = polygons[index[-1]][:-1]
        index = np.delete(index, -1)

        iou_list = np.zeros((len(index), ))
        for i in range(len(index)):
            B = polygons[index[i]][:-1]

            iou_list[i] = boundary_iou(A, B)
        remove_index = np.where(iou_list > threshold)
        index = np.delete(index, remove_index)

    return keep_poly


def fill_hole(input_mask):
    h, w = input_mask.shape
    canvas = np.zeros((h + 2, w + 2), np.uint8)
    canvas[1:h + 1, 1:w + 1] = input_mask.copy()

    mask = np.zeros((h + 4, w + 4), np.uint8)

    cv2.floodFill(canvas, mask, (0, 0), 1)
    canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool)

    return ~canvas | input_mask


def fourier2poly(fourier_coeff, num_reconstr_points=50):
    """ Inverse Fourier transform
        Args:
            fourier_coeff (ndarray): Fourier coefficients shaped (n, 2k+1),
                with n and k being candidates number and Fourier degree
                respectively.
            num_reconstr_points (int): Number of reconstructed polygon points.
        Returns:
            Polygons (ndarray): The reconstructed polygons shaped (n, n')
        """

    a = np.zeros((len(fourier_coeff), num_reconstr_points), dtype='complex')
    k = (len(fourier_coeff[0]) - 1) // 2

    a[:, 0:k + 1] = fourier_coeff[:, k:]
    a[:, -k:] = fourier_coeff[:, :k]

    poly_complex = ifft(a) * num_reconstr_points
    polygon = np.zeros((len(fourier_coeff), num_reconstr_points, 2))
    polygon[:, :, 0] = poly_complex.real
    polygon[:, :, 1] = poly_complex.imag
    return polygon.astype('int32').reshape((len(fourier_coeff), -1))


def fcenet_decode(preds,
                  fourier_degree,
                  num_reconstr_points,
                  scale,
                  alpha=1.0,
                  beta=2.0,
                  text_repr_type='poly',
                  score_thr=0.3,
                  nms_thr=0.1):
    """Decoding predictions of FCENet to instances.

    Args:
        preds (list(Tensor)): The head output tensors.
        fourier_degree (int): The maximum Fourier transform degree k.
        num_reconstr_points (int): The points number of the polygon
            reconstructed from predicted Fourier coefficients.
        scale (int): The down-sample scale of the prediction.
        alpha (float) : The parameter to calculate final scores. Score_{final}
                = (Score_{text region} ^ alpha)
                * (Score_{text center region}^ beta)
        beta (float) : The parameter to calculate final score.
        text_repr_type (str):  Boundary encoding type 'poly' or 'quad'.
        score_thr (float) : The threshold used to filter out the final
            candidates.
        nms_thr (float) :  The threshold of nms.

    Returns:
        boundaries (list[list[float]]): The instance boundary and confidence
            list.
    """
    assert isinstance(preds, list)
    assert len(preds) == 2
    assert text_repr_type in ['poly', 'quad']

    # import pdb;pdb.set_trace()
    cls_pred = preds[0][0]
    # tr_pred = F.softmax(cls_pred[0:2], axis=0).cpu().numpy()
    # tcl_pred = F.softmax(cls_pred[2:], axis=0).cpu().numpy()

    tr_pred = cls_pred[0:2]
    tcl_pred = cls_pred[2:]

    reg_pred = preds[1][0].transpose([1, 2, 0])  #.cpu().numpy()
    x_pred = reg_pred[:, :, :2 * fourier_degree + 1]
    y_pred = reg_pred[:, :, 2 * fourier_degree + 1:]

    score_pred = (tr_pred[1]**alpha) * (tcl_pred[1]**beta)
    tr_pred_mask = (score_pred) > score_thr
    tr_mask = fill_hole(tr_pred_mask)

    tr_contours, _ = cv2.findContours(
        tr_mask.astype(np.uint8), cv2.RETR_TREE,
        cv2.CHAIN_APPROX_SIMPLE)  # opencv4

    mask = np.zeros_like(tr_mask)
    boundaries = []
    for cont in tr_contours:
        deal_map = mask.copy().astype(np.int8)
        cv2.drawContours(deal_map, [cont], -1, 1, -1)

        score_map = score_pred * deal_map
        score_mask = score_map > 0
        xy_text = np.argwhere(score_mask)
        dxy = xy_text[:, 1] + xy_text[:, 0] * 1j

        x, y = x_pred[score_mask], y_pred[score_mask]
        c = x + y * 1j
        c[:, fourier_degree] = c[:, fourier_degree] + dxy
        c *= scale

        polygons = fourier2poly(c, num_reconstr_points)
        score = score_map[score_mask].reshape(-1, 1)
        polygons = poly_nms(np.hstack((polygons, score)).tolist(), nms_thr)

        boundaries = boundaries + polygons

    boundaries = poly_nms(boundaries, nms_thr)

    if text_repr_type == 'quad':
        new_boundaries = []
        for boundary in boundaries:
            poly = np.array(boundary[:-1]).reshape(-1, 2).astype(np.float32)
            score = boundary[-1]
            points = cv2.boxPoints(cv2.minAreaRect(poly))
            points = np.int0(points)
            new_boundaries.append(points.reshape(-1).tolist() + [score])

    return boundaries


class FCEPostProcess(object):
    """
    The post process for FCENet.
    """

    def __init__(self,
                 scales,
                 fourier_degree=5,
                 num_reconstr_points=50,
                 decoding_type='fcenet',
                 score_thr=0.3,
                 nms_thr=0.1,
                 alpha=1.0,
                 beta=1.0,
                 text_repr_type='poly',
                 **kwargs):

        self.scales = scales
        self.fourier_degree = fourier_degree
        self.num_reconstr_points = num_reconstr_points
        self.decoding_type = decoding_type
        self.score_thr = score_thr
        self.nms_thr = nms_thr
        self.alpha = alpha
        self.beta = beta
        self.text_repr_type = text_repr_type

    def __call__(self, preds, shape_list):
        score_maps = []
        for key, value in preds.items():
            if isinstance(value, paddle.Tensor):
                value = value.numpy()
            cls_res = value[:, :4, :, :]
            reg_res = value[:, 4:, :, :]
            score_maps.append([cls_res, reg_res])

        return self.get_boundary(score_maps, shape_list)

    def resize_boundary(self, boundaries, scale_factor):
        """Rescale boundaries via scale_factor.

        Args:
            boundaries (list[list[float]]): The boundary list. Each boundary
            with size 2k+1 with k>=4.
            scale_factor(ndarray): The scale factor of size (4,).

        Returns:
            boundaries (list[list[float]]): The scaled boundaries.
        """
        # assert check_argument.is_2dlist(boundaries)
        # assert isinstance(scale_factor, np.ndarray)
        # assert scale_factor.shape[0] == 4

        boxes = []
        scores = []
        for b in boundaries:
            sz = len(b)
            valid_boundary(b, True)
            scores.append(b[-1])
            b = (np.array(b[:sz - 1]) *
                 (np.tile(scale_factor[:2], int(
                     (sz - 1) / 2)).reshape(1, sz - 1))).flatten().tolist()
            boxes.append(np.array(b).reshape([-1, 2]))

        return np.array(boxes, dtype=np.float32), scores

    def get_boundary(self, score_maps, shape_list):
        assert len(score_maps) == len(self.scales)
        # import pdb;pdb.set_trace()
        boundaries = []
        for idx, score_map in enumerate(score_maps):
            scale = self.scales[idx]
            boundaries = boundaries + self._get_boundary_single(score_map,
                                                                scale)

        # nms
        boundaries = poly_nms(boundaries, self.nms_thr)
        # if rescale:
        # import pdb;pdb.set_trace()
        boundaries, scores = self.resize_boundary(
            boundaries, (1 / shape_list[0, 2:]).tolist()[::-1])

        boxes_batch = [dict(points=boundaries, scores=scores)]
        return boxes_batch

    def _get_boundary_single(self, score_map, scale):
        assert len(score_map) == 2
        assert score_map[1].shape[1] == 4 * self.fourier_degree + 2

        return fcenet_decode(
            preds=score_map,
            fourier_degree=self.fourier_degree,
            num_reconstr_points=self.num_reconstr_points,
            scale=scale,
            alpha=self.alpha,
            beta=self.beta,
            text_repr_type=self.text_repr_type,
            score_thr=self.score_thr,
            nms_thr=self.nms_thr)
add fcenet 2022-01-27 17:36:19 +08:00			`from __future__ import absolute_import`
			`from __future__ import division`
			`from __future__ import print_function`

			`import numpy as np`
			`import cv2`
			`import paddle`
			`from numpy.fft import ifft`
			`import Polygon as plg`


			`def points2polygon(points):`
			`"""Convert k points to 1 polygon.`

			`Args:`
			`points (ndarray or list): A ndarray or a list of shape (2k)`
			`that indicates k points.`

			`Returns:`
			`polygon (Polygon): A polygon object.`
			`"""`
			`if isinstance(points, list):`
			`points = np.array(points)`

			`assert isinstance(points, np.ndarray)`
			`assert (points.size % 2 == 0) and (points.size >= 8)`

			`point_mat = points.reshape([-1, 2])`
			`return plg.Polygon(point_mat)`


			`def poly_intersection(poly_det, poly_gt):`
			`"""Calculate the intersection area between two polygon.`

			`Args:`
			`poly_det (Polygon): A polygon predicted by detector.`
			`poly_gt (Polygon): A gt polygon.`

			`Returns:`
			`intersection_area (float): The intersection area between two polygons.`
			`"""`
			`assert isinstance(poly_det, plg.Polygon)`
			`assert isinstance(poly_gt, plg.Polygon)`

			`poly_inter = poly_det & poly_gt`
			`if len(poly_inter) == 0:`
			`return 0, poly_inter`
			`return poly_inter.area(), poly_inter`


			`def poly_union(poly_det, poly_gt):`
			`"""Calculate the union area between two polygon.`

			`Args:`
			`poly_det (Polygon): A polygon predicted by detector.`
			`poly_gt (Polygon): A gt polygon.`

			`Returns:`
			`union_area (float): The union area between two polygons.`
			`"""`
			`assert isinstance(poly_det, plg.Polygon)`
			`assert isinstance(poly_gt, plg.Polygon)`

			`area_det = poly_det.area()`
			`area_gt = poly_gt.area()`
			`area_inters, _ = poly_intersection(poly_det, poly_gt)`
			`return area_det + area_gt - area_inters`


			`def valid_boundary(x, with_score=True):`
			`num = len(x)`
			`if num < 8:`
			`return False`
			`if num % 2 == 0 and (not with_score):`
			`return True`
			`if num % 2 == 1 and with_score:`
			`return True`

			`return False`


			`def boundary_iou(src, target):`
			`"""Calculate the IOU between two boundaries.`

			`Args:`
			`src (list): Source boundary.`
			`target (list): Target boundary.`

			`Returns:`
			`iou (float): The iou between two boundaries.`
			`"""`
			`assert valid_boundary(src, False)`
			`assert valid_boundary(target, False)`
			`src_poly = points2polygon(src)`
			`target_poly = points2polygon(target)`

			`return poly_iou(src_poly, target_poly)`


			`def poly_iou(poly_det, poly_gt):`
			`"""Calculate the IOU between two polygons.`

			`Args:`
			`poly_det (Polygon): A polygon predicted by detector.`
			`poly_gt (Polygon): A gt polygon.`

			`Returns:`
			`iou (float): The IOU between two polygons.`
			`"""`
			`assert isinstance(poly_det, plg.Polygon)`
			`assert isinstance(poly_gt, plg.Polygon)`
			`area_inters, _ = poly_intersection(poly_det, poly_gt)`
			`area_union = poly_union(poly_det, poly_gt)`
			`if area_union == 0:`
			`return 0.0`
			`return area_inters / area_union`


			`def poly_nms(polygons, threshold):`
			`assert isinstance(polygons, list)`

			`polygons = np.array(sorted(polygons, key=lambda x: x[-1]))`

			`keep_poly = []`
			`index = [i for i in range(polygons.shape[0])]`

			`while len(index) > 0:`
			`keep_poly.append(polygons[index[-1]].tolist())`
			`A = polygons[index[-1]][:-1]`
			`index = np.delete(index, -1)`

			`iou_list = np.zeros((len(index), ))`
			`for i in range(len(index)):`
			`B = polygons[index[i]][:-1]`

			`iou_list[i] = boundary_iou(A, B)`
			`remove_index = np.where(iou_list > threshold)`
			`index = np.delete(index, remove_index)`

			`return keep_poly`


			`def fill_hole(input_mask):`
			`h, w = input_mask.shape`
			`canvas = np.zeros((h + 2, w + 2), np.uint8)`
			`canvas[1:h + 1, 1:w + 1] = input_mask.copy()`

			`mask = np.zeros((h + 4, w + 4), np.uint8)`

			`cv2.floodFill(canvas, mask, (0, 0), 1)`
			`canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool)`

			`return ~canvas \| input_mask`


			`def fourier2poly(fourier_coeff, num_reconstr_points=50):`
			`""" Inverse Fourier transform`
			`Args:`
			`fourier_coeff (ndarray): Fourier coefficients shaped (n, 2k+1),`
			`with n and k being candidates number and Fourier degree`
			`respectively.`
			`num_reconstr_points (int): Number of reconstructed polygon points.`
			`Returns:`
			`Polygons (ndarray): The reconstructed polygons shaped (n, n')`
			`"""`

			`a = np.zeros((len(fourier_coeff), num_reconstr_points), dtype='complex')`
			`k = (len(fourier_coeff[0]) - 1) // 2`

			`a[:, 0:k + 1] = fourier_coeff[:, k:]`
			`a[:, -k:] = fourier_coeff[:, :k]`

			`poly_complex = ifft(a) * num_reconstr_points`
			`polygon = np.zeros((len(fourier_coeff), num_reconstr_points, 2))`
			`polygon[:, :, 0] = poly_complex.real`
			`polygon[:, :, 1] = poly_complex.imag`
			`return polygon.astype('int32').reshape((len(fourier_coeff), -1))`


			`def fcenet_decode(preds,`
			`fourier_degree,`
			`num_reconstr_points,`
			`scale,`
			`alpha=1.0,`
			`beta=2.0,`
			`text_repr_type='poly',`
			`score_thr=0.3,`
			`nms_thr=0.1):`
			`"""Decoding predictions of FCENet to instances.`

			`Args:`
			`preds (list(Tensor)): The head output tensors.`
			`fourier_degree (int): The maximum Fourier transform degree k.`
			`num_reconstr_points (int): The points number of the polygon`
			`reconstructed from predicted Fourier coefficients.`
			`scale (int): The down-sample scale of the prediction.`
			`alpha (float) : The parameter to calculate final scores. Score_{final}`
			`= (Score_{text region} ^ alpha)`
			`* (Score_{text center region}^ beta)`
			`beta (float) : The parameter to calculate final score.`
			`text_repr_type (str): Boundary encoding type 'poly' or 'quad'.`
			`score_thr (float) : The threshold used to filter out the final`
			`candidates.`
			`nms_thr (float) : The threshold of nms.`

			`Returns:`
			`boundaries (list[list[float]]): The instance boundary and confidence`
			`list.`
			`"""`
			`assert isinstance(preds, list)`
			`assert len(preds) == 2`
			`assert text_repr_type in ['poly', 'quad']`

			`# import pdb;pdb.set_trace()`
			`cls_pred = preds[0][0]`
			`# tr_pred = F.softmax(cls_pred[0:2], axis=0).cpu().numpy()`
			`# tcl_pred = F.softmax(cls_pred[2:], axis=0).cpu().numpy()`

			`tr_pred = cls_pred[0:2]`
			`tcl_pred = cls_pred[2:]`

			`reg_pred = preds[1][0].transpose([1, 2, 0]) #.cpu().numpy()`
			`x_pred = reg_pred[:, :, :2 * fourier_degree + 1]`
			`y_pred = reg_pred[:, :, 2 * fourier_degree + 1:]`

			`score_pred = (tr_pred[1]*alpha) (tcl_pred[1]**beta)`
			`tr_pred_mask = (score_pred) > score_thr`
			`tr_mask = fill_hole(tr_pred_mask)`

			`tr_contours, _ = cv2.findContours(`
			`tr_mask.astype(np.uint8), cv2.RETR_TREE,`
			`cv2.CHAIN_APPROX_SIMPLE) # opencv4`

			`mask = np.zeros_like(tr_mask)`
			`boundaries = []`
			`for cont in tr_contours:`
			`deal_map = mask.copy().astype(np.int8)`
			`cv2.drawContours(deal_map, [cont], -1, 1, -1)`

			`score_map = score_pred * deal_map`
			`score_mask = score_map > 0`
			`xy_text = np.argwhere(score_mask)`
			`dxy = xy_text[:, 1] + xy_text[:, 0] * 1j`

			`x, y = x_pred[score_mask], y_pred[score_mask]`
			`c = x + y * 1j`
			`c[:, fourier_degree] = c[:, fourier_degree] + dxy`
			`c *= scale`

			`polygons = fourier2poly(c, num_reconstr_points)`
			`score = score_map[score_mask].reshape(-1, 1)`
			`polygons = poly_nms(np.hstack((polygons, score)).tolist(), nms_thr)`

			`boundaries = boundaries + polygons`

			`boundaries = poly_nms(boundaries, nms_thr)`

			`if text_repr_type == 'quad':`
			`new_boundaries = []`
			`for boundary in boundaries:`
			`poly = np.array(boundary[:-1]).reshape(-1, 2).astype(np.float32)`
			`score = boundary[-1]`
			`points = cv2.boxPoints(cv2.minAreaRect(poly))`
			`points = np.int0(points)`
			`new_boundaries.append(points.reshape(-1).tolist() + [score])`

			`return boundaries`


			`class FCEPostProcess(object):`
			`"""`
			`The post process for FCENet.`
			`"""`

			`def __init__(self,`
			`scales,`
			`fourier_degree=5,`
			`num_reconstr_points=50,`
			`decoding_type='fcenet',`
			`score_thr=0.3,`
			`nms_thr=0.1,`
			`alpha=1.0,`
			`beta=1.0,`
			`text_repr_type='poly',`
			`**kwargs):`

			`self.scales = scales`
			`self.fourier_degree = fourier_degree`
			`self.num_reconstr_points = num_reconstr_points`
			`self.decoding_type = decoding_type`
			`self.score_thr = score_thr`
			`self.nms_thr = nms_thr`
			`self.alpha = alpha`
			`self.beta = beta`
			`self.text_repr_type = text_repr_type`

			`def __call__(self, preds, shape_list):`
			`score_maps = []`
			`for key, value in preds.items():`
			`if isinstance(value, paddle.Tensor):`
			`value = value.numpy()`
			`cls_res = value[:, :4, :, :]`
			`reg_res = value[:, 4:, :, :]`
			`score_maps.append([cls_res, reg_res])`

			`return self.get_boundary(score_maps, shape_list)`

			`def resize_boundary(self, boundaries, scale_factor):`
			`"""Rescale boundaries via scale_factor.`

			`Args:`
			`boundaries (list[list[float]]): The boundary list. Each boundary`
			`with size 2k+1 with k>=4.`
			`scale_factor(ndarray): The scale factor of size (4,).`

			`Returns:`
			`boundaries (list[list[float]]): The scaled boundaries.`
			`"""`
			`# assert check_argument.is_2dlist(boundaries)`
			`# assert isinstance(scale_factor, np.ndarray)`
			`# assert scale_factor.shape[0] == 4`

			`boxes = []`
			`scores = []`
			`for b in boundaries:`
			`sz = len(b)`
			`valid_boundary(b, True)`
			`scores.append(b[-1])`
			`b = (np.array(b[:sz - 1]) *`
			`(np.tile(scale_factor[:2], int(`
			`(sz - 1) / 2)).reshape(1, sz - 1))).flatten().tolist()`
			`boxes.append(np.array(b).reshape([-1, 2]))`

			`return np.array(boxes, dtype=np.float32), scores`

			`def get_boundary(self, score_maps, shape_list):`
			`assert len(score_maps) == len(self.scales)`
			`# import pdb;pdb.set_trace()`
			`boundaries = []`
			`for idx, score_map in enumerate(score_maps):`
			`scale = self.scales[idx]`
			`boundaries = boundaries + self._get_boundary_single(score_map,`
			`scale)`

			`# nms`
			`boundaries = poly_nms(boundaries, self.nms_thr)`
			`# if rescale:`
			`# import pdb;pdb.set_trace()`
			`boundaries, scores = self.resize_boundary(`
			`boundaries, (1 / shape_list[0, 2:]).tolist()[::-1])`

			`boxes_batch = [dict(points=boundaries, scores=scores)]`
			`return boxes_batch`

			`def _get_boundary_single(self, score_map, scale):`
			`assert len(score_map) == 2`
			`assert score_map[1].shape[1] == 4 * self.fourier_degree + 2`

			`return fcenet_decode(`
			`preds=score_map,`
			`fourier_degree=self.fourier_degree,`
			`num_reconstr_points=self.num_reconstr_points,`
			`scale=scale,`
			`alpha=self.alpha,`
			`beta=self.beta,`
			`text_repr_type=self.text_repr_type,`
			`score_thr=self.score_thr,`
			`nms_thr=self.nms_thr)`