File size: 5,385 Bytes
0b4516f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import math
import os
import os.path as osp
import mmcv
import mmengine
import numpy as np
from mmocr.utils import crop_img, dump_ocr_data
def parse_args():
parser = argparse.ArgumentParser(
description='Generate training, validation and test set of IMGUR ')
parser.add_argument('root_path', help='Root dir path of IMGUR')
args = parser.parse_args()
return args
def collect_imgur_info(root_path, annotation_filename, print_every=1000):
annotation_path = osp.join(root_path, 'annotations', annotation_filename)
if not osp.exists(annotation_path):
raise Exception(
f'{annotation_path} not exists, please check and try again.')
annotation = mmengine.load(annotation_path)
images = annotation['index_to_ann_map'].keys()
img_infos = []
for i, img_name in enumerate(images):
if i >= 0 and i % print_every == 0:
img_path = osp.join(root_path, 'imgs', img_name + '.jpg')
# Skip not exist images
if not osp.exists(img_path):
img = mmcv.imread(img_path, 'unchanged')
# Skip broken images
if img is None:
img_info = dict(
file_name=img_name + '.jpg',
anno_info = []
for ann_id in annotation['index_to_ann_map'][img_name]:
ann = annotation['ann_id'][ann_id]
# The original annotation is oriented rects [x, y, w, h, a]
box = np.fromstring(
ann['bounding_box'][1:-2], sep=',', dtype=float)
bbox = convert_oriented_box(box)
word = ann['word']
anno = dict(bbox=bbox, word=word)
return img_infos
def convert_oriented_box(box):
x_ctr, y_ctr, width, height, angle = box[:5]
angle = -angle * math.pi / 180
tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
R = np.array([[np.cos(angle), -np.sin(angle)],
[np.sin(angle), np.cos(angle)]])
poly =
x0, x1, x2, x3 = poly[0, :4] + x_ctr
y0, y1, y2, y3 = poly[1, :4] + y_ctr
poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float32)
poly = get_best_begin_point_single(poly)
return poly.tolist()
def get_best_begin_point_single(coordinate):
x1, y1, x2, y2, x3, y3, x4, y4 = coordinate
xmin = min(x1, x2, x3, x4)
ymin = min(y1, y2, y3, y4)
xmax = max(x1, x2, x3, x4)
ymax = max(y1, y2, y3, y4)
combine = [[[x1, y1], [x2, y2], [x3, y3], [x4, y4]],
[[x2, y2], [x3, y3], [x4, y4], [x1, y1]],
[[x3, y3], [x4, y4], [x1, y1], [x2, y2]],
[[x4, y4], [x1, y1], [x2, y2], [x3, y3]]]
dst_coordinate = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]]
force = 100000000.0
force_flag = 0
for i in range(4):
temp_force = cal_line_length(combine[i][0], dst_coordinate[0]) \
+ cal_line_length(combine[i][1], dst_coordinate[1]) \
+ cal_line_length(combine[i][2], dst_coordinate[2]) \
+ cal_line_length(combine[i][3], dst_coordinate[3])
if temp_force < force:
force = temp_force
force_flag = i
if force_flag != 0:
return np.array(combine[force_flag]).reshape(8)
def cal_line_length(point1, point2):
return math.sqrt(
math.pow(point1[0] - point2[0], 2) +
math.pow(point1[1] - point2[1], 2))
def generate_ann(root_path, split, image_infos):
dst_image_root = osp.join(root_path, 'crops', split)
dst_label_file = osp.join(root_path, f'{split}_label.json')
os.makedirs(dst_image_root, exist_ok=True)
img_info = []
for image_info in image_infos:
index = 1
src_img_path = osp.join(root_path, 'imgs', image_info['file_name'])
image = mmcv.imread(src_img_path)
src_img_root = image_info['file_name'].split('.')[0]
for anno in image_info['anno_info']:
word = anno['word']
dst_img = crop_img(image, anno['bbox'], 0, 0)
# Skip invalid annotations
if min(dst_img.shape) == 0:
dst_img_name = f'{src_img_root}_{index}.png'
index += 1
dst_img_path = osp.join(dst_image_root, dst_img_name)
mmcv.imwrite(dst_img, dst_img_path)
'file_name': dst_img_name,
'anno_info': [{
'text': word
dump_ocr_data(img_info, dst_label_file, 'textrecog')
def main():
args = parse_args()
root_path = args.root_path
for split in ['train', 'val', 'test']:
print(f'Processing {split} set...')
with mmengine.Timer(
print_tmpl='It takes {}s to convert IMGUR annotation'):
anno_infos = collect_imgur_info(
root_path, f'imgur5k_annotations_{split}.json')
generate_ann(root_path, split, anno_infos)
if __name__ == '__main__':