PaddleOCR/deploy/slim/auto_compression/ppocrv4_det_server_dataset_process.py

34 lines
1.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import cv2
dataset_path = "datasets/v4_4_test_dataset"
annotation_file = "datasets/v4_4_test_dataset/label.txt"
small_images_path = "datasets/v4_4_test_dataset_small"
new_annotation_file = "datasets/v4_4_test_dataset_small/label.txt"
os.makedirs(small_images_path, exist_ok=True)
with open(annotation_file, "r") as f:
lines = f.readlines()
for i, line in enumerate(lines):
image_name = line.split(" ")[0]
image_path = os.path.join(dataset_path, image_name)
try:
image = cv2.imread(image_path)
height, width, _ = image.shape
# 如果图像的宽度和高度都小于2000而且长宽比小于2将其复制到新的文件夹并保存其标注信息
if height < 2000 and width < 2000:
if max(height, width) / min(height, width) < 2:
print(i, height, width, image_path)
small_image_path = os.path.join(small_images_path, image_name)
cv2.imwrite(small_image_path, image)
with open(new_annotation_file, "a") as f:
f.write(f"{line}")
except:
continue