|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import cv2 |
|
import random |
|
import pyclipper |
|
import paddle |
|
|
|
import numpy as np |
|
import Polygon as plg |
|
import scipy.io as scio |
|
|
|
from PIL import Image |
|
import paddle.vision.transforms as transforms |
|
|
|
|
|
class RandomScale(): |
|
def __init__(self, short_size=640, **kwargs): |
|
self.short_size = short_size |
|
|
|
def scale_aligned(self, img, scale): |
|
oh, ow = img.shape[0:2] |
|
h = int(oh * scale + 0.5) |
|
w = int(ow * scale + 0.5) |
|
if h % 32 != 0: |
|
h = h + (32 - h % 32) |
|
if w % 32 != 0: |
|
w = w + (32 - w % 32) |
|
img = cv2.resize(img, dsize=(w, h)) |
|
factor_h = h / oh |
|
factor_w = w / ow |
|
return img, factor_h, factor_w |
|
|
|
def __call__(self, data): |
|
img = data['image'] |
|
|
|
h, w = img.shape[0:2] |
|
random_scale = np.array([0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3]) |
|
scale = (np.random.choice(random_scale) * self.short_size) / min(h, w) |
|
img, factor_h, factor_w = self.scale_aligned(img, scale) |
|
|
|
data['scale_factor'] = (factor_w, factor_h) |
|
data['image'] = img |
|
return data |
|
|
|
|
|
class MakeShrink(): |
|
def __init__(self, kernel_scale=0.7, **kwargs): |
|
self.kernel_scale = kernel_scale |
|
|
|
def dist(self, a, b): |
|
return np.linalg.norm((a - b), ord=2, axis=0) |
|
|
|
def perimeter(self, bbox): |
|
peri = 0.0 |
|
for i in range(bbox.shape[0]): |
|
peri += self.dist(bbox[i], bbox[(i + 1) % bbox.shape[0]]) |
|
return peri |
|
|
|
def shrink(self, bboxes, rate, max_shr=20): |
|
rate = rate * rate |
|
shrinked_bboxes = [] |
|
for bbox in bboxes: |
|
area = plg.Polygon(bbox).area() |
|
peri = self.perimeter(bbox) |
|
|
|
try: |
|
pco = pyclipper.PyclipperOffset() |
|
pco.AddPath(bbox, pyclipper.JT_ROUND, |
|
pyclipper.ET_CLOSEDPOLYGON) |
|
offset = min( |
|
int(area * (1 - rate) / (peri + 0.001) + 0.5), max_shr) |
|
|
|
shrinked_bbox = pco.Execute(-offset) |
|
if len(shrinked_bbox) == 0: |
|
shrinked_bboxes.append(bbox) |
|
continue |
|
|
|
shrinked_bbox = np.array(shrinked_bbox[0]) |
|
if shrinked_bbox.shape[0] <= 2: |
|
shrinked_bboxes.append(bbox) |
|
continue |
|
|
|
shrinked_bboxes.append(shrinked_bbox) |
|
except Exception as e: |
|
shrinked_bboxes.append(bbox) |
|
|
|
return shrinked_bboxes |
|
|
|
def __call__(self, data): |
|
img = data['image'] |
|
bboxes = data['polys'] |
|
words = data['texts'] |
|
scale_factor = data['scale_factor'] |
|
|
|
gt_instance = np.zeros(img.shape[0:2], dtype='uint8') |
|
training_mask = np.ones(img.shape[0:2], dtype='uint8') |
|
training_mask_distance = np.ones(img.shape[0:2], dtype='uint8') |
|
|
|
for i in range(len(bboxes)): |
|
bboxes[i] = np.reshape(bboxes[i] * ( |
|
[scale_factor[0], scale_factor[1]] * (bboxes[i].shape[0] // 2)), |
|
(bboxes[i].shape[0] // 2, 2)).astype('int32') |
|
|
|
for i in range(len(bboxes)): |
|
|
|
cv2.drawContours(gt_instance, [bboxes[i]], -1, i + 1, -1) |
|
|
|
|
|
cv2.drawContours(training_mask, [bboxes[i]], -1, 0, -1) |
|
|
|
|
|
if words[i] == '###' or words[i] == '???': |
|
cv2.drawContours(training_mask_distance, [bboxes[i]], -1, 0, -1) |
|
|
|
|
|
gt_kernel_instance = np.zeros(img.shape[0:2], dtype='uint8') |
|
kernel_bboxes = self.shrink(bboxes, self.kernel_scale) |
|
for i in range(len(bboxes)): |
|
cv2.drawContours(gt_kernel_instance, [kernel_bboxes[i]], -1, i + 1, |
|
-1) |
|
|
|
|
|
if words[i] != '###' and words[i] != '???': |
|
cv2.drawContours(training_mask, [kernel_bboxes[i]], -1, 1, -1) |
|
|
|
gt_kernel = gt_kernel_instance.copy() |
|
|
|
gt_kernel[gt_kernel > 0] = 1 |
|
|
|
|
|
tmp1 = gt_kernel_instance.copy() |
|
erode_kernel = np.ones((3, 3), np.uint8) |
|
tmp1 = cv2.erode(tmp1, erode_kernel, iterations=1) |
|
tmp2 = tmp1.copy() |
|
tmp2 = cv2.erode(tmp2, erode_kernel, iterations=1) |
|
|
|
|
|
gt_kernel_inner = tmp1 - tmp2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data['image'] = [ |
|
img, gt_instance, training_mask, gt_kernel_instance, gt_kernel, |
|
gt_kernel_inner, training_mask_distance |
|
] |
|
return data |
|
|
|
|
|
class GroupRandomHorizontalFlip(): |
|
def __init__(self, p=0.5, **kwargs): |
|
self.p = p |
|
|
|
def __call__(self, data): |
|
imgs = data['image'] |
|
|
|
if random.random() < self.p: |
|
for i in range(len(imgs)): |
|
imgs[i] = np.flip(imgs[i], axis=1).copy() |
|
data['image'] = imgs |
|
return data |
|
|
|
|
|
class GroupRandomRotate(): |
|
def __init__(self, **kwargs): |
|
pass |
|
|
|
def __call__(self, data): |
|
imgs = data['image'] |
|
|
|
max_angle = 10 |
|
angle = random.random() * 2 * max_angle - max_angle |
|
for i in range(len(imgs)): |
|
img = imgs[i] |
|
w, h = img.shape[:2] |
|
rotation_matrix = cv2.getRotationMatrix2D((h / 2, w / 2), angle, 1) |
|
img_rotation = cv2.warpAffine( |
|
img, rotation_matrix, (h, w), flags=cv2.INTER_NEAREST) |
|
imgs[i] = img_rotation |
|
|
|
data['image'] = imgs |
|
return data |
|
|
|
|
|
class GroupRandomCropPadding(): |
|
def __init__(self, target_size=(640, 640), **kwargs): |
|
self.target_size = target_size |
|
|
|
def __call__(self, data): |
|
imgs = data['image'] |
|
|
|
h, w = imgs[0].shape[0:2] |
|
t_w, t_h = self.target_size |
|
p_w, p_h = self.target_size |
|
if w == t_w and h == t_h: |
|
return data |
|
|
|
t_h = t_h if t_h < h else h |
|
t_w = t_w if t_w < w else w |
|
|
|
if random.random() > 3.0 / 8.0 and np.max(imgs[1]) > 0: |
|
|
|
tl = np.min(np.where(imgs[1] > 0), axis=1) - (t_h, t_w) |
|
tl[tl < 0] = 0 |
|
br = np.max(np.where(imgs[1] > 0), axis=1) - (t_h, t_w) |
|
br[br < 0] = 0 |
|
br[0] = min(br[0], h - t_h) |
|
br[1] = min(br[1], w - t_w) |
|
|
|
i = random.randint(tl[0], br[0]) if tl[0] < br[0] else 0 |
|
j = random.randint(tl[1], br[1]) if tl[1] < br[1] else 0 |
|
else: |
|
i = random.randint(0, h - t_h) if h - t_h > 0 else 0 |
|
j = random.randint(0, w - t_w) if w - t_w > 0 else 0 |
|
|
|
n_imgs = [] |
|
for idx in range(len(imgs)): |
|
if len(imgs[idx].shape) == 3: |
|
s3_length = int(imgs[idx].shape[-1]) |
|
img = imgs[idx][i:i + t_h, j:j + t_w, :] |
|
img_p = cv2.copyMakeBorder( |
|
img, |
|
0, |
|
p_h - t_h, |
|
0, |
|
p_w - t_w, |
|
borderType=cv2.BORDER_CONSTANT, |
|
value=tuple(0 for i in range(s3_length))) |
|
else: |
|
img = imgs[idx][i:i + t_h, j:j + t_w] |
|
img_p = cv2.copyMakeBorder( |
|
img, |
|
0, |
|
p_h - t_h, |
|
0, |
|
p_w - t_w, |
|
borderType=cv2.BORDER_CONSTANT, |
|
value=(0, )) |
|
n_imgs.append(img_p) |
|
|
|
data['image'] = n_imgs |
|
return data |
|
|
|
|
|
class MakeCentripetalShift(): |
|
def __init__(self, **kwargs): |
|
pass |
|
|
|
def jaccard(self, As, Bs): |
|
A = As.shape[0] |
|
B = Bs.shape[0] |
|
|
|
dis = np.sqrt( |
|
np.sum((As[:, np.newaxis, :].repeat( |
|
B, axis=1) - Bs[np.newaxis, :, :].repeat( |
|
A, axis=0))**2, |
|
axis=-1)) |
|
|
|
ind = np.argmin(dis, axis=-1) |
|
|
|
return ind |
|
|
|
def __call__(self, data): |
|
imgs = data['image'] |
|
|
|
img, gt_instance, training_mask, gt_kernel_instance, gt_kernel, gt_kernel_inner, training_mask_distance = \ |
|
imgs[0], imgs[1], imgs[2], imgs[3], imgs[4], imgs[5], imgs[6] |
|
|
|
max_instance = np.max(gt_instance) |
|
|
|
|
|
gt_distance = np.zeros((2, *img.shape[0:2]), dtype=np.float32) |
|
for i in range(1, max_instance + 1): |
|
|
|
ind = (gt_kernel_inner == i) |
|
|
|
if np.sum(ind) == 0: |
|
training_mask[gt_instance == i] = 0 |
|
training_mask_distance[gt_instance == i] = 0 |
|
continue |
|
|
|
kpoints = np.array(np.where(ind)).transpose( |
|
(1, 0))[:, ::-1].astype('float32') |
|
|
|
ind = (gt_instance == i) * (gt_kernel_instance == 0) |
|
if np.sum(ind) == 0: |
|
continue |
|
pixels = np.where(ind) |
|
|
|
points = np.array(pixels).transpose( |
|
(1, 0))[:, ::-1].astype('float32') |
|
|
|
bbox_ind = self.jaccard(points, kpoints) |
|
|
|
offset_gt = kpoints[bbox_ind] - points |
|
|
|
gt_distance[:, pixels[0], pixels[1]] = offset_gt.T * 0.1 |
|
|
|
img = Image.fromarray(img) |
|
img = img.convert('RGB') |
|
|
|
data["image"] = img |
|
data["gt_kernel"] = gt_kernel.astype("int64") |
|
data["training_mask"] = training_mask.astype("int64") |
|
data["gt_instance"] = gt_instance.astype("int64") |
|
data["gt_kernel_instance"] = gt_kernel_instance.astype("int64") |
|
data["training_mask_distance"] = training_mask_distance.astype("int64") |
|
data["gt_distance"] = gt_distance.astype("float32") |
|
|
|
return data |
|
|
|
|
|
class ScaleAlignedShort(): |
|
def __init__(self, short_size=640, **kwargs): |
|
self.short_size = short_size |
|
|
|
def __call__(self, data): |
|
img = data['image'] |
|
|
|
org_img_shape = img.shape |
|
|
|
h, w = img.shape[0:2] |
|
scale = self.short_size * 1.0 / min(h, w) |
|
h = int(h * scale + 0.5) |
|
w = int(w * scale + 0.5) |
|
if h % 32 != 0: |
|
h = h + (32 - h % 32) |
|
if w % 32 != 0: |
|
w = w + (32 - w % 32) |
|
img = cv2.resize(img, dsize=(w, h)) |
|
|
|
new_img_shape = img.shape |
|
img_shape = np.array(org_img_shape + new_img_shape) |
|
|
|
data['shape'] = img_shape |
|
data['image'] = img |
|
|
|
return data |