mirror of
				https://github.com/PaddlePaddle/PaddleOCR.git
				synced 2025-10-25 23:04:56 +00:00 
			
		
		
		
	 af87691591
			
		
	
	
		af87691591
		
			
		
	
	
	
	
		
			
			* add ci for paddleocr test * fix flake8 error * fix paddlepaddle deps * add dep * fix * move flake8 to pre-commit * update ut * fix bug * fix bug set paddlepaddle==2.5 * fix bug * fix bug * fix bug * update test * remove lscpu
		
			
				
	
	
		
			309 lines
		
	
	
		
			9.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			309 lines
		
	
	
		
			9.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # -*- coding: utf-8 -*-
 | ||
| # @Time    : 2019/8/23 21:52
 | ||
| # @Author  : zhoujun
 | ||
| 
 | ||
| import math
 | ||
| import numbers
 | ||
| import random
 | ||
| 
 | ||
| import cv2
 | ||
| import numpy as np
 | ||
| from skimage.util import random_noise
 | ||
| 
 | ||
| 
 | ||
| class RandomNoise:
 | ||
|     def __init__(self, random_rate):
 | ||
|         self.random_rate = random_rate
 | ||
| 
 | ||
|     def __call__(self, data: dict):
 | ||
|         """
 | ||
|         对图片加噪声
 | ||
|         :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
 | ||
|         :return:
 | ||
|         """
 | ||
|         if random.random() > self.random_rate:
 | ||
|             return data
 | ||
|         data["img"] = (
 | ||
|             random_noise(data["img"], mode="gaussian", clip=True) * 255
 | ||
|         ).astype(data["img"].dtype)
 | ||
|         return data
 | ||
| 
 | ||
| 
 | ||
| class RandomScale:
 | ||
|     def __init__(self, scales, random_rate):
 | ||
|         """
 | ||
|         :param scales: 尺度
 | ||
|         :param ramdon_rate: 随机系数
 | ||
|         :return:
 | ||
|         """
 | ||
|         self.random_rate = random_rate
 | ||
|         self.scales = scales
 | ||
| 
 | ||
|     def __call__(self, data: dict) -> dict:
 | ||
|         """
 | ||
|         从scales中随机选择一个尺度,对图片和文本框进行缩放
 | ||
|         :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
 | ||
|         :return:
 | ||
|         """
 | ||
|         if random.random() > self.random_rate:
 | ||
|             return data
 | ||
|         im = data["img"]
 | ||
|         text_polys = data["text_polys"]
 | ||
| 
 | ||
|         tmp_text_polys = text_polys.copy()
 | ||
|         rd_scale = float(np.random.choice(self.scales))
 | ||
|         im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
 | ||
|         tmp_text_polys *= rd_scale
 | ||
| 
 | ||
|         data["img"] = im
 | ||
|         data["text_polys"] = tmp_text_polys
 | ||
|         return data
 | ||
| 
 | ||
| 
 | ||
| class RandomRotateImgBox:
 | ||
|     def __init__(self, degrees, random_rate, same_size=False):
 | ||
|         """
 | ||
|         :param degrees: 角度,可以是一个数值或者list
 | ||
|         :param ramdon_rate: 随机系数
 | ||
|         :param same_size: 是否保持和原图一样大
 | ||
|         :return:
 | ||
|         """
 | ||
|         if isinstance(degrees, numbers.Number):
 | ||
|             if degrees < 0:
 | ||
|                 raise ValueError("If degrees is a single number, it must be positive.")
 | ||
|             degrees = (-degrees, degrees)
 | ||
|         elif (
 | ||
|             isinstance(degrees, list)
 | ||
|             or isinstance(degrees, tuple)
 | ||
|             or isinstance(degrees, np.ndarray)
 | ||
|         ):
 | ||
|             if len(degrees) != 2:
 | ||
|                 raise ValueError("If degrees is a sequence, it must be of len 2.")
 | ||
|             degrees = degrees
 | ||
|         else:
 | ||
|             raise Exception("degrees must in Number or list or tuple or np.ndarray")
 | ||
|         self.degrees = degrees
 | ||
|         self.same_size = same_size
 | ||
|         self.random_rate = random_rate
 | ||
| 
 | ||
|     def __call__(self, data: dict) -> dict:
 | ||
|         """
 | ||
|         从scales中随机选择一个尺度,对图片和文本框进行缩放
 | ||
|         :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
 | ||
|         :return:
 | ||
|         """
 | ||
|         if random.random() > self.random_rate:
 | ||
|             return data
 | ||
|         im = data["img"]
 | ||
|         text_polys = data["text_polys"]
 | ||
| 
 | ||
|         # ---------------------- 旋转图像 ----------------------
 | ||
|         w = im.shape[1]
 | ||
|         h = im.shape[0]
 | ||
|         angle = np.random.uniform(self.degrees[0], self.degrees[1])
 | ||
| 
 | ||
|         if self.same_size:
 | ||
|             nw = w
 | ||
|             nh = h
 | ||
|         else:
 | ||
|             # 角度变弧度
 | ||
|             rangle = np.deg2rad(angle)
 | ||
|             # 计算旋转之后图像的w, h
 | ||
|             nw = abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)
 | ||
|             nh = abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)
 | ||
|         # 构造仿射矩阵
 | ||
|         rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, 1)
 | ||
|         # 计算原图中心点到新图中心点的偏移量
 | ||
|         rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
 | ||
|         # 更新仿射矩阵
 | ||
|         rot_mat[0, 2] += rot_move[0]
 | ||
|         rot_mat[1, 2] += rot_move[1]
 | ||
|         # 仿射变换
 | ||
|         rot_img = cv2.warpAffine(
 | ||
|             im,
 | ||
|             rot_mat,
 | ||
|             (int(math.ceil(nw)), int(math.ceil(nh))),
 | ||
|             flags=cv2.INTER_LANCZOS4,
 | ||
|         )
 | ||
| 
 | ||
|         # ---------------------- 矫正bbox坐标 ----------------------
 | ||
|         # rot_mat是最终的旋转矩阵
 | ||
|         # 获取原始bbox的四个中点,然后将这四个点转换到旋转后的坐标系下
 | ||
|         rot_text_polys = list()
 | ||
|         for bbox in text_polys:
 | ||
|             point1 = np.dot(rot_mat, np.array([bbox[0, 0], bbox[0, 1], 1]))
 | ||
|             point2 = np.dot(rot_mat, np.array([bbox[1, 0], bbox[1, 1], 1]))
 | ||
|             point3 = np.dot(rot_mat, np.array([bbox[2, 0], bbox[2, 1], 1]))
 | ||
|             point4 = np.dot(rot_mat, np.array([bbox[3, 0], bbox[3, 1], 1]))
 | ||
|             rot_text_polys.append([point1, point2, point3, point4])
 | ||
|         data["img"] = rot_img
 | ||
|         data["text_polys"] = np.array(rot_text_polys)
 | ||
|         return data
 | ||
| 
 | ||
| 
 | ||
| class RandomResize:
 | ||
|     def __init__(self, size, random_rate, keep_ratio=False):
 | ||
|         """
 | ||
|         :param input_size: resize尺寸,数字或者list的形式,如果为list形式,就是[w,h]
 | ||
|         :param ramdon_rate: 随机系数
 | ||
|         :param keep_ratio: 是否保持长宽比
 | ||
|         :return:
 | ||
|         """
 | ||
|         if isinstance(size, numbers.Number):
 | ||
|             if size < 0:
 | ||
|                 raise ValueError(
 | ||
|                     "If input_size is a single number, it must be positive."
 | ||
|                 )
 | ||
|             size = (size, size)
 | ||
|         elif (
 | ||
|             isinstance(size, list)
 | ||
|             or isinstance(size, tuple)
 | ||
|             or isinstance(size, np.ndarray)
 | ||
|         ):
 | ||
|             if len(size) != 2:
 | ||
|                 raise ValueError("If input_size is a sequence, it must be of len 2.")
 | ||
|             size = (size[0], size[1])
 | ||
|         else:
 | ||
|             raise Exception("input_size must in Number or list or tuple or np.ndarray")
 | ||
|         self.size = size
 | ||
|         self.keep_ratio = keep_ratio
 | ||
|         self.random_rate = random_rate
 | ||
| 
 | ||
|     def __call__(self, data: dict) -> dict:
 | ||
|         """
 | ||
|         从scales中随机选择一个尺度,对图片和文本框进行缩放
 | ||
|         :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
 | ||
|         :return:
 | ||
|         """
 | ||
|         if random.random() > self.random_rate:
 | ||
|             return data
 | ||
|         im = data["img"]
 | ||
|         text_polys = data["text_polys"]
 | ||
| 
 | ||
|         if self.keep_ratio:
 | ||
|             # 将图片短边pad到和长边一样
 | ||
|             h, w, c = im.shape
 | ||
|             max_h = max(h, self.size[0])
 | ||
|             max_w = max(w, self.size[1])
 | ||
|             im_padded = np.zeros((max_h, max_w, c), dtype=np.uint8)
 | ||
|             im_padded[:h, :w] = im.copy()
 | ||
|             im = im_padded
 | ||
|         text_polys = text_polys.astype(np.float32)
 | ||
|         h, w, _ = im.shape
 | ||
|         im = cv2.resize(im, self.size)
 | ||
|         w_scale = self.size[0] / float(w)
 | ||
|         h_scale = self.size[1] / float(h)
 | ||
|         text_polys[:, :, 0] *= w_scale
 | ||
|         text_polys[:, :, 1] *= h_scale
 | ||
| 
 | ||
|         data["img"] = im
 | ||
|         data["text_polys"] = text_polys
 | ||
|         return data
 | ||
| 
 | ||
| 
 | ||
| def resize_image(img, short_size):
 | ||
|     height, width, _ = img.shape
 | ||
|     if height < width:
 | ||
|         new_height = short_size
 | ||
|         new_width = new_height / height * width
 | ||
|     else:
 | ||
|         new_width = short_size
 | ||
|         new_height = new_width / width * height
 | ||
|     new_height = int(round(new_height / 32) * 32)
 | ||
|     new_width = int(round(new_width / 32) * 32)
 | ||
|     resized_img = cv2.resize(img, (new_width, new_height))
 | ||
|     return resized_img, (new_width / width, new_height / height)
 | ||
| 
 | ||
| 
 | ||
| class ResizeShortSize:
 | ||
|     def __init__(self, short_size, resize_text_polys=True):
 | ||
|         """
 | ||
|         :param size: resize尺寸,数字或者list的形式,如果为list形式,就是[w,h]
 | ||
|         :return:
 | ||
|         """
 | ||
|         self.short_size = short_size
 | ||
|         self.resize_text_polys = resize_text_polys
 | ||
| 
 | ||
|     def __call__(self, data: dict) -> dict:
 | ||
|         """
 | ||
|         对图片和文本框进行缩放
 | ||
|         :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
 | ||
|         :return:
 | ||
|         """
 | ||
|         im = data["img"]
 | ||
|         text_polys = data["text_polys"]
 | ||
| 
 | ||
|         h, w, _ = im.shape
 | ||
|         short_edge = min(h, w)
 | ||
|         if short_edge < self.short_size:
 | ||
|             # 保证短边 >= short_size
 | ||
|             scale = self.short_size / short_edge
 | ||
|             im = cv2.resize(im, dsize=None, fx=scale, fy=scale)
 | ||
|             scale = (scale, scale)
 | ||
|             # im, scale = resize_image(im, self.short_size)
 | ||
|             if self.resize_text_polys:
 | ||
|                 # text_polys *= scale
 | ||
|                 text_polys[:, 0] *= scale[0]
 | ||
|                 text_polys[:, 1] *= scale[1]
 | ||
| 
 | ||
|         data["img"] = im
 | ||
|         data["text_polys"] = text_polys
 | ||
|         return data
 | ||
| 
 | ||
| 
 | ||
| class HorizontalFlip:
 | ||
|     def __init__(self, random_rate):
 | ||
|         """
 | ||
| 
 | ||
|         :param random_rate: 随机系数
 | ||
|         """
 | ||
|         self.random_rate = random_rate
 | ||
| 
 | ||
|     def __call__(self, data: dict) -> dict:
 | ||
|         """
 | ||
|         从scales中随机选择一个尺度,对图片和文本框进行缩放
 | ||
|         :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
 | ||
|         :return:
 | ||
|         """
 | ||
|         if random.random() > self.random_rate:
 | ||
|             return data
 | ||
|         im = data["img"]
 | ||
|         text_polys = data["text_polys"]
 | ||
| 
 | ||
|         flip_text_polys = text_polys.copy()
 | ||
|         flip_im = cv2.flip(im, 1)
 | ||
|         h, w, _ = flip_im.shape
 | ||
|         flip_text_polys[:, :, 0] = w - flip_text_polys[:, :, 0]
 | ||
| 
 | ||
|         data["img"] = flip_im
 | ||
|         data["text_polys"] = flip_text_polys
 | ||
|         return data
 | ||
| 
 | ||
| 
 | ||
| class VerticallFlip:
 | ||
|     def __init__(self, random_rate):
 | ||
|         """
 | ||
| 
 | ||
|         :param random_rate: 随机系数
 | ||
|         """
 | ||
|         self.random_rate = random_rate
 | ||
| 
 | ||
|     def __call__(self, data: dict) -> dict:
 | ||
|         """
 | ||
|         从scales中随机选择一个尺度,对图片和文本框进行缩放
 | ||
|         :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
 | ||
|         :return:
 | ||
|         """
 | ||
|         if random.random() > self.random_rate:
 | ||
|             return data
 | ||
|         im = data["img"]
 | ||
|         text_polys = data["text_polys"]
 | ||
| 
 | ||
|         flip_text_polys = text_polys.copy()
 | ||
|         flip_im = cv2.flip(im, 0)
 | ||
|         h, w, _ = flip_im.shape
 | ||
|         flip_text_polys[:, :, 1] = h - flip_text_polys[:, :, 1]
 | ||
|         data["img"] = flip_im
 | ||
|         data["text_polys"] = flip_text_polys
 | ||
|         return data
 |