# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. import random import cv2 import numpy as np from PIL import Image from shapely import affinity from shapely.geometry import Polygon from torchvision.transforms import functional as F class Compose(object): def __init__(self, transforms): self.transforms = transforms def __call__(self, image, target): for t in self.transforms: image, target = t(image, target) return image, target def __repr__(self): format_string = self.__class__.__name__ + "(" for t in self.transforms: format_string += "\n" format_string += " {0}".format(t) format_string += "\n)" return format_string class Resize(object): def __init__(self, min_size, max_size, strict_resize): self.min_size = min_size self.max_size = max_size self.strict_resize = strict_resize # modified from torchvision to add support for max size def get_size(self, image_size): w, h = image_size if isinstance(self.min_size, tuple): if len(self.min_size) == 1: size = self.min_size[0] else: random_size_index = random.randint(0, len(self.min_size) - 1) size = self.min_size[random_size_index] else: size = self.min_size max_size = self.max_size if max_size is not None: min_original_size = float(min((w, h))) max_original_size = float(max((w, h))) if max_original_size / min_original_size * size > max_size: size = int(round(max_size * min_original_size / max_original_size)) if (w <= h and w == size) or (h <= w and h == size): if self.strict_resize: h = h if h % 32 == 0 else (h // 32) * 32 w = w if w % 32 == 0 else (w // 32) * 32 return (h, w) if w < h: ow = size oh = int(size * h / w) else: oh = size ow = int(size * w / h) if self.strict_resize: oh = oh if oh % 32 == 0 else (oh // 32) * 32 ow = ow if ow % 32 == 0 else (ow // 32) * 32 return (oh, ow) def __call__(self, image, target): size = self.get_size(image.size) image = F.resize(image, size) if target is not None: target = target.resize(image.size) return image, target class RandomCrop(object): def __init__(self, prob, crop_min_size=500, crop_max_size=1000, max_trys=50): self.min_size = crop_min_size self.max_size = crop_max_size self.max_trys = max_trys self.prob = prob def __call__(self, image, target): if random.random() < self.prob: im = np.array(image) w, h = image.size h_array = np.zeros((h), dtype=np.int32) w_array = np.zeros((w), dtype=np.int32) boxes = target.bbox.numpy() if len(boxes) == 0: return image, target for box in boxes: box = np.round(box, decimals=0).astype(np.int32) minx = box[0] maxx = box[2] w_array[minx:maxx] = 1 miny = box[1] maxy = box[3] h_array[miny:maxy] = 1 h_axis = np.where(h_array == 0)[0] w_axis = np.where(w_array == 0)[0] if len(h_axis) == 0 or len(w_axis) == 0: return image, target for _ in range(self.max_trys): xx = np.random.choice(w_axis, size=2) xmin = min(xx) xmax = max(xx) x_size = xmax - xmin if x_size > self.max_size or x_size < self.min_size: continue yy = np.random.choice(h_axis, size=2) ymin = min(yy) ymax = max(yy) y_size = ymax - ymin if y_size > self.max_size or y_size < self.min_size: continue box_in_area = ( (boxes[:, 0] >= xmin) & (boxes[:, 1] >= ymin) & (boxes[:, 2] <= xmax) & (boxes[:, 3] <= ymax) ) if len(np.where(box_in_area)[0]) == 0: continue im = im[ymin:ymax, xmin:xmax] target = target.crop([xmin, ymin, xmax, ymax]) return Image.fromarray(im), target return image, target else: return image, target # class RandomCropFixSize(object): # def __init__(self, prob, crop_size=512, max_trys=50): # self.crop_size = crop_size # self.max_trys = max_trys # self.prob = prob # def __call__(self, image, target): # if random.random() < self.prob: # im = np.array(image) # w, h = image.size # h_array = np.zeros((h), dtype=np.int32) # w_array = np.zeros((w), dtype=np.int32) # boxes = target.bbox.numpy() # if len(boxes) == 0: # return image, target # for box in boxes: # box = np.round(box, decimals=0).astype(np.int32) # minx = box[0] # maxx = box[2] # w_array[minx:maxx] = 1 # miny = box[1] # maxy = box[3] # h_array[miny:maxy] = 1 # h_axis = np.where(h_array == 0)[0] # w_axis = np.where(w_array == 0)[0] # if len(h_axis) == 0 or len(w_axis) == 0: # return image, target # for _ in range(self.max_trys): # xx = np.random.choice(w_axis, size=2) # xmin = min(xx) # xmax = max(xx) # x_size = xmax - xmin # if x_size > self.max_size or x_size < self.min_size: # continue # yy = np.random.choice(h_axis, size=2) # ymin = min(yy) # ymax = max(yy) # y_size = ymax - ymin # if y_size > self.max_size or y_size < self.min_size: # continue # box_in_area = ( # (boxes[:, 0] >= xmin) # & (boxes[:, 1] >= ymin) # & (boxes[:, 2] <= xmax) # & (boxes[:, 3] <= ymax) # ) # if len(np.where(box_in_area)[0]) == 0: # continue # im = im[ymin:ymax, xmin:xmax] # target = target.crop([xmin, ymin, xmax, ymax]) # return Image.fromarray(im), target # return image, target # else: # return image, target class RandomHorizontalFlip(object): def __init__(self, prob=0.5): self.prob = prob def __call__(self, image, target): if random.random() < self.prob: image = F.hflip(image) target = target.transpose(0) return image, target class ToTensor(object): def __call__(self, image, target): return F.to_tensor(image), target class Normalize(object): def __init__(self, mean, std, to_bgr255=True): self.mean = mean self.std = std self.to_bgr255 = to_bgr255 def __call__(self, image, target): if self.to_bgr255: image = image[[2, 1, 0]] * 255 image = F.normalize(image, mean=self.mean, std=self.std) return image, target class RandomBrightness(object): def __init__(self, prob=0.5): self.prob = prob def __call__(self, image, target): if random.random() < self.prob: brightness_factor = random.uniform(0.5, 2) image = F.adjust_brightness(image, brightness_factor) return image, target class RandomContrast(object): def __init__(self, prob=0.5): self.prob = prob def __call__(self, image, target): if random.random() < self.prob: contrast_factor = random.uniform(0.5, 2) image = F.adjust_contrast(image, contrast_factor) return image, target class RandomHue(object): def __init__(self, prob=0.5): self.prob = prob def __call__(self, image, target): if random.random() < self.prob: hue_factor = random.uniform(-0.25, 0.25) image = F.adjust_hue(image, hue_factor) return image, target class RandomSaturation(object): def __init__(self, prob=0.5): self.prob = prob def __call__(self, image, target): if random.random() < self.prob: saturation_factor = random.uniform(0.5, 2) image = F.adjust_saturation(image, saturation_factor) return image, target class RandomGamma(object): def __init__(self, prob=0.5): self.prob = prob def __call__(self, image, target): if random.random() < self.prob: gamma_factor = random.uniform(0.5, 2) image = F.adjust_gamma(image, gamma_factor) return image, target class RandomRotate(object): def __init__(self, prob, max_theta=30, fix_rotate=False): self.prob = prob self.max_theta = max_theta self.fix_rotate = fix_rotate def __call__(self, image, target): if random.random() < self.prob and target is not None: # try: if self.fix_rotate: delta = 30 else: delta = random.uniform(-1 * self.max_theta, self.max_theta) width, height = image.size ## get the minimal rect to cover the rotated image img_box = [[[0, 0], [width, 0], [width, height], [0, height]]] rotated_img_box = _quad2minrect( _rotate_polygons(img_box, delta, (width / 2, height / 2)) ) r_height = int( max(rotated_img_box[0][3], rotated_img_box[0][1]) - min(rotated_img_box[0][3], rotated_img_box[0][1]) ) r_width = int( max(rotated_img_box[0][2], rotated_img_box[0][0]) - min(rotated_img_box[0][2], rotated_img_box[0][0]) ) r_height = max(r_height, height + 1) r_width = max(r_width, width + 1) ## padding im im_padding = np.zeros((r_height, r_width, 3)) start_h, start_w = ( int((r_height - height) / 2.0), int((r_width - width) / 2.0), ) end_h, end_w = start_h + height, start_w + width im_padding[start_h:end_h, start_w:end_w, :] = image M = cv2.getRotationMatrix2D((r_width / 2, r_height / 2), delta, 1) im = cv2.warpAffine(im_padding, M, (r_width, r_height)) im = Image.fromarray(im.astype(np.uint8)) target = target.rotate( -delta, (r_width / 2, r_height / 2), start_h, start_w ) return im, target # except: # return image, target else: return image, target def _quad2minrect(boxes): ## trans a quad(N*4) to a rectangle(N*4) which has miniual area to cover it return np.hstack( ( boxes[:, ::2].min(axis=1).reshape((-1, 1)), boxes[:, 1::2].min(axis=1).reshape((-1, 1)), boxes[:, ::2].max(axis=1).reshape((-1, 1)), boxes[:, 1::2].max(axis=1).reshape((-1, 1)), ) ) def _boxlist2quads(boxlist): res = np.zeros((len(boxlist), 8)) for i, box in enumerate(boxlist): # print(box) res[i] = np.array( [ box[0][0], box[0][1], box[1][0], box[1][1], box[2][0], box[2][1], box[3][0], box[3][1], ] ) return res def _rotate_polygons(polygons, angle, r_c): ## polygons: N*8 ## r_x: rotate center x ## r_y: rotate center y ## angle: -15~15 rotate_boxes_list = [] for poly in polygons: box = Polygon(poly) rbox = affinity.rotate(box, angle, r_c) if len(list(rbox.exterior.coords)) < 5: print("img_box_ori:", poly) print("img_box_rotated:", rbox) # assert(len(list(rbox.exterior.coords))>=5) rotate_boxes_list.append(rbox.boundary.coords[:-1]) res = _boxlist2quads(rotate_boxes_list) return res