# Copyright (C) 2022-present Naver Corporation. All rights reserved. # Licensed under CC BY-NC-SA 4.0 (non-commercial use only). # -------------------------------------------------------- # Data augmentation for training stereo and flow # -------------------------------------------------------- # References # https://github.com/autonomousvision/unimatch/blob/master/dataloader/stereo/transforms.py # https://github.com/autonomousvision/unimatch/blob/master/dataloader/flow/transforms.py import numpy as np import random from PIL import Image import cv2 cv2.setNumThreads(0) cv2.ocl.setUseOpenCL(False) import torch from torchvision.transforms import ColorJitter import torchvision.transforms.functional as FF class StereoAugmentor(object): def __init__(self, crop_size, scale_prob=0.5, scale_xonly=True, lhth=800., lminscale=0.0, lmaxscale=1.0, hminscale=-0.2, hmaxscale=0.4, scale_interp_nearest=True, rightjitterprob=0.5, v_flip_prob=0.5, color_aug_asym=True, color_choice_prob=0.5): self.crop_size = crop_size self.scale_prob = scale_prob self.scale_xonly = scale_xonly self.lhth = lhth self.lminscale = lminscale self.lmaxscale = lmaxscale self.hminscale = hminscale self.hmaxscale = hmaxscale self.scale_interp_nearest = scale_interp_nearest self.rightjitterprob = rightjitterprob self.v_flip_prob = v_flip_prob self.color_aug_asym = color_aug_asym self.color_choice_prob = color_choice_prob def _random_scale(self, img1, img2, disp): ch,cw = self.crop_size h,w = img1.shape[:2] if self.scale_prob>0. and np.random.rand()1.: scale_x = clip_scale scale_y = scale_x if not self.scale_xonly else 1.0 img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR) img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR) disp = cv2.resize(disp, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR if not self.scale_interp_nearest else cv2.INTER_NEAREST) * scale_x return img1, img2, disp def _random_crop(self, img1, img2, disp): h,w = img1.shape[:2] ch,cw = self.crop_size assert ch<=h and cw<=w, (img1.shape, h,w,ch,cw) offset_x = np.random.randint(w - cw + 1) offset_y = np.random.randint(h - ch + 1) img1 = img1[offset_y:offset_y+ch,offset_x:offset_x+cw] img2 = img2[offset_y:offset_y+ch,offset_x:offset_x+cw] disp = disp[offset_y:offset_y+ch,offset_x:offset_x+cw] return img1, img2, disp def _random_vflip(self, img1, img2, disp): # vertical flip if self.v_flip_prob>0 and np.random.rand() < self.v_flip_prob: img1 = np.copy(np.flipud(img1)) img2 = np.copy(np.flipud(img2)) disp = np.copy(np.flipud(disp)) return img1, img2, disp def _random_rotate_shift_right(self, img2): if self.rightjitterprob>0. and np.random.rand() 0) & (xx < wd1) & (yy > 0) & (yy < ht1) xx = xx[v] yy = yy[v] flow1 = flow1[v] flow = np.inf * np.ones([ht1, wd1, 2], dtype=np.float32) # invalid value every where, before we fill it with the correct ones flow[yy, xx] = flow1 return flow def spatial_transform(self, img1, img2, flow, dname): if np.random.rand() < self.spatial_aug_prob: # randomly sample scale ht, wd = img1.shape[:2] clip_min_scale = np.maximum( (self.crop_size[0] + 8) / float(ht), (self.crop_size[1] + 8) / float(wd)) min_scale, max_scale = self.min_scale, self.max_scale scale = 2 ** np.random.uniform(self.min_scale, self.max_scale) scale_x = scale scale_y = scale if np.random.rand() < self.stretch_prob: scale_x *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch) scale_y *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch) scale_x = np.clip(scale_x, clip_min_scale, None) scale_y = np.clip(scale_y, clip_min_scale, None) # rescale the images img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR) img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR) flow = self._resize_flow(flow, scale_x, scale_y, factor=2.0 if dname=='Spring' else 1.0) elif dname=="Spring": flow = self._resize_flow(flow, 1.0, 1.0, factor=2.0) if self.h_flip_prob>0. and np.random.rand() < self.h_flip_prob: # h-flip img1 = img1[:, ::-1] img2 = img2[:, ::-1] flow = flow[:, ::-1] * [-1.0, 1.0] if self.v_flip_prob>0. and np.random.rand() < self.v_flip_prob: # v-flip img1 = img1[::-1, :] img2 = img2[::-1, :] flow = flow[::-1, :] * [1.0, -1.0] # In case no cropping if img1.shape[0] - self.crop_size[0] > 0: y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0]) else: y0 = 0 if img1.shape[1] - self.crop_size[1] > 0: x0 = np.random.randint(0, img1.shape[1] - self.crop_size[1]) else: x0 = 0 img1 = img1[y0:y0 + self.crop_size[0], x0:x0 + self.crop_size[1]] img2 = img2[y0:y0 + self.crop_size[0], x0:x0 + self.crop_size[1]] flow = flow[y0:y0 + self.crop_size[0], x0:x0 + self.crop_size[1]] return img1, img2, flow def __call__(self, img1, img2, flow, dname): img1, img2, flow = self.spatial_transform(img1, img2, flow, dname) img1, img2 = self.color_transform(img1, img2) img1 = np.ascontiguousarray(img1) img2 = np.ascontiguousarray(img2) flow = np.ascontiguousarray(flow) return img1, img2, flow