Spaces:
Runtime error
Runtime error
#!/usr/bin/env python3 | |
# -*- coding:utf-8 -*- | |
# Copyright (c) Megvii, Inc. and its affiliates. | |
""" | |
Data augmentation functionality. Passed as callable transformations to | |
Dataset classes. | |
The data augmentation procedures were interpreted from @weiliu89's SSD paper | |
http://arxiv.org/abs/1512.02325 | |
""" | |
import cv2 | |
import numpy as np | |
import torch | |
from yolox.utils import xyxy2cxcywh | |
import math | |
import random | |
def augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4): | |
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains | |
hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) | |
dtype = img.dtype # uint8 | |
x = np.arange(0, 256, dtype=np.int16) | |
lut_hue = ((x * r[0]) % 180).astype(dtype) | |
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) | |
lut_val = np.clip(x * r[2], 0, 255).astype(dtype) | |
img_hsv = cv2.merge( | |
(cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)) | |
).astype(dtype) | |
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed | |
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.2): | |
# box1(4,n), box2(4,n) | |
# Compute candidate boxes which include follwing 5 things: | |
# box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio | |
w1, h1 = box1[2] - box1[0], box1[3] - box1[1] | |
w2, h2 = box2[2] - box2[0], box2[3] - box2[1] | |
ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio | |
return ( | |
(w2 > wh_thr) | |
& (h2 > wh_thr) | |
& (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) | |
& (ar < ar_thr) | |
) # candidates | |
def random_perspective( | |
img, | |
targets=(), | |
degrees=10, | |
translate=0.1, | |
scale=0.1, | |
shear=10, | |
perspective=0.0, | |
border=(0, 0), | |
): | |
# targets = [cls, xyxy] | |
height = img.shape[0] + border[0] * 2 # shape(h,w,c) | |
width = img.shape[1] + border[1] * 2 | |
# Center | |
C = np.eye(3) | |
C[0, 2] = -img.shape[1] / 2 # x translation (pixels) | |
C[1, 2] = -img.shape[0] / 2 # y translation (pixels) | |
# Rotation and Scale | |
R = np.eye(3) | |
a = random.uniform(-degrees, degrees) | |
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations | |
s = random.uniform(scale[0], scale[1]) | |
# s = 2 ** random.uniform(-scale, scale) | |
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) | |
# Shear | |
S = np.eye(3) | |
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) | |
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) | |
# Translation | |
T = np.eye(3) | |
T[0, 2] = ( | |
random.uniform(0.5 - translate, 0.5 + translate) * width | |
) # x translation (pixels) | |
T[1, 2] = ( | |
random.uniform(0.5 - translate, 0.5 + translate) * height | |
) # y translation (pixels) | |
# Combined rotation matrix | |
M = T @ S @ R @ C # order of operations (right to left) is IMPORTANT | |
########################### | |
# For Aug out of Mosaic | |
# s = 1. | |
# M = np.eye(3) | |
########################### | |
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed | |
if perspective: | |
img = cv2.warpPerspective( | |
img, M, dsize=(width, height), borderValue=(114, 114, 114) | |
) | |
else: # affine | |
img = cv2.warpAffine( | |
img, M[:2], dsize=(width, height), borderValue=(114, 114, 114) | |
) | |
# Transform label coordinates | |
n = len(targets) | |
if n: | |
# warp points | |
xy = np.ones((n * 4, 3)) | |
xy[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( | |
n * 4, 2 | |
) # x1y1, x2y2, x1y2, x2y1 | |
xy = xy @ M.T # transform | |
if perspective: | |
xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale | |
else: # affine | |
xy = xy[:, :2].reshape(n, 8) | |
# create new boxes | |
x = xy[:, [0, 2, 4, 6]] | |
y = xy[:, [1, 3, 5, 7]] | |
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T | |
# clip boxes | |
#xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) | |
#xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) | |
# filter candidates | |
i = box_candidates(box1=targets[:, :4].T * s, box2=xy.T) | |
targets = targets[i] | |
targets[:, :4] = xy[i] | |
targets = targets[targets[:, 0] < width] | |
targets = targets[targets[:, 2] > 0] | |
targets = targets[targets[:, 1] < height] | |
targets = targets[targets[:, 3] > 0] | |
return img, targets | |
def _distort(image): | |
def _convert(image, alpha=1, beta=0): | |
tmp = image.astype(float) * alpha + beta | |
tmp[tmp < 0] = 0 | |
tmp[tmp > 255] = 255 | |
image[:] = tmp | |
image = image.copy() | |
if random.randrange(2): | |
_convert(image, beta=random.uniform(-32, 32)) | |
if random.randrange(2): | |
_convert(image, alpha=random.uniform(0.5, 1.5)) | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) | |
if random.randrange(2): | |
tmp = image[:, :, 0].astype(int) + random.randint(-18, 18) | |
tmp %= 180 | |
image[:, :, 0] = tmp | |
if random.randrange(2): | |
_convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5)) | |
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) | |
return image | |
def _mirror(image, boxes): | |
_, width, _ = image.shape | |
if random.randrange(2): | |
image = image[:, ::-1] | |
boxes = boxes.copy() | |
boxes[:, 0::2] = width - boxes[:, 2::-2] | |
return image, boxes | |
def preproc(image, input_size, mean, std, swap=(2, 0, 1)): | |
if len(image.shape) == 3: | |
padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0 | |
else: | |
padded_img = np.ones(input_size) * 114.0 | |
img = np.array(image) | |
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) | |
resized_img = cv2.resize( | |
img, | |
(int(img.shape[1] * r), int(img.shape[0] * r)), | |
interpolation=cv2.INTER_LINEAR, | |
).astype(np.float32) | |
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img | |
padded_img = padded_img[:, :, ::-1] | |
padded_img /= 255.0 | |
if mean is not None: | |
padded_img -= mean | |
if std is not None: | |
padded_img /= std | |
padded_img = padded_img.transpose(swap) | |
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) | |
return padded_img, r | |
class TrainTransform: | |
def __init__(self, p=0.5, rgb_means=None, std=None, max_labels=100): | |
self.means = rgb_means | |
self.std = std | |
self.p = p | |
self.max_labels = max_labels | |
def __call__(self, image, targets, input_dim): | |
boxes = targets[:, :4].copy() | |
labels = targets[:, 4].copy() | |
ids = targets[:, 5].copy() | |
if len(boxes) == 0: | |
targets = np.zeros((self.max_labels, 6), dtype=np.float32) | |
image, r_o = preproc(image, input_dim, self.means, self.std) | |
image = np.ascontiguousarray(image, dtype=np.float32) | |
return image, targets | |
image_o = image.copy() | |
targets_o = targets.copy() | |
height_o, width_o, _ = image_o.shape | |
boxes_o = targets_o[:, :4] | |
labels_o = targets_o[:, 4] | |
ids_o = targets_o[:, 5] | |
# bbox_o: [xyxy] to [c_x,c_y,w,h] | |
boxes_o = xyxy2cxcywh(boxes_o) | |
image_t = _distort(image) | |
image_t, boxes = _mirror(image_t, boxes) | |
height, width, _ = image_t.shape | |
image_t, r_ = preproc(image_t, input_dim, self.means, self.std) | |
# boxes [xyxy] 2 [cx,cy,w,h] | |
boxes = xyxy2cxcywh(boxes) | |
boxes *= r_ | |
mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1 | |
boxes_t = boxes[mask_b] | |
labels_t = labels[mask_b] | |
ids_t = ids[mask_b] | |
if len(boxes_t) == 0: | |
image_t, r_o = preproc(image_o, input_dim, self.means, self.std) | |
boxes_o *= r_o | |
boxes_t = boxes_o | |
labels_t = labels_o | |
ids_t = ids_o | |
labels_t = np.expand_dims(labels_t, 1) | |
ids_t = np.expand_dims(ids_t, 1) | |
targets_t = np.hstack((labels_t, boxes_t, ids_t)) | |
padded_labels = np.zeros((self.max_labels, 6)) | |
padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[ | |
: self.max_labels | |
] | |
padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32) | |
image_t = np.ascontiguousarray(image_t, dtype=np.float32) | |
return image_t, padded_labels | |
class ValTransform: | |
""" | |
Defines the transformations that should be applied to test PIL image | |
for input into the network | |
dimension -> tensorize -> color adj | |
Arguments: | |
resize (int): input dimension to SSD | |
rgb_means ((int,int,int)): average RGB of the dataset | |
(104,117,123) | |
swap ((int,int,int)): final order of channels | |
Returns: | |
transform (transform) : callable transform to be applied to test/val | |
data | |
""" | |
def __init__(self, rgb_means=None, std=None, swap=(2, 0, 1)): | |
self.means = rgb_means | |
self.swap = swap | |
self.std = std | |
# assume input is cv2 img for now | |
def __call__(self, img, res, input_size): | |
img, _ = preproc(img, input_size, self.means, self.std, self.swap) | |
return img, np.zeros((1, 5)) | |