|
|
|
|
|
import random |
|
import numpy as np |
|
import random |
|
import cv2 |
|
from typing import List |
|
from PIL import Image |
|
|
|
from dynamic_utils import (extend_key_frame_to_all, |
|
sample_key_frames) |
|
import imutils |
|
import math |
|
from scipy.ndimage import gaussian_filter1d |
|
from glob import glob |
|
|
|
|
|
class RandomRegionSampler(object): |
|
|
|
def __init__(self, |
|
num_rois: int, |
|
scales: tuple, |
|
ratios: tuple, |
|
scale_jitter: float): |
|
""" Randomly sample several RoIs |
|
|
|
Args: |
|
num_rois (int): number of sampled RoIs per image |
|
scales (tuple): scales of candidate bounding boxes |
|
ratios (tuple): aspect ratios of candidate bounding boxes |
|
scale_jitter (float): scale jitter factor, positive number |
|
""" |
|
|
|
self.num_rois = num_rois |
|
self.scale_jitter = scale_jitter |
|
|
|
scales = np.array(scales, np.float32) |
|
ratios = np.array(ratios, np.float32) |
|
widths = scales.reshape(1, -1) * np.sqrt(ratios).reshape(-1, 1) |
|
heights = scales.reshape(1, -1) / np.sqrt(ratios).reshape(-1, 1) |
|
self.anchors = np.concatenate((widths.reshape(-1, 1), |
|
heights.reshape(-1, 1)), axis=-1) |
|
|
|
def sample(self, data: List[np.ndarray]) -> np.ndarray: |
|
""" Sample boxes. |
|
|
|
Args: |
|
data (list): image list, each element is a numpy.ndarray |
|
in shape of [H, W, 3] |
|
|
|
Returns: |
|
boxes (np.ndarray): the sampled bounding boxes. in shape of |
|
[self.num_rois, 4], represented in (x1, y1, x2, y2). |
|
|
|
""" |
|
h, w = data[0].shape[0:2] |
|
|
|
|
|
anchor_inds = np.random.randint(0, len(self.anchors), |
|
size=(self.num_rois, )) |
|
box_shapes = self.anchors[anchor_inds].copy() |
|
if self.scale_jitter is not None: |
|
scale_factors = np.random.uniform(-self.scale_jitter, |
|
self.scale_jitter, |
|
size=(self.num_rois, 2)) |
|
box_shapes = box_shapes * np.exp(scale_factors) |
|
box_shapes[:, 0] = np.clip(box_shapes[:, 0], 1, w - 1) |
|
box_shapes[:, 1] = np.clip(box_shapes[:, 1], 1, h - 1) |
|
|
|
|
|
|
|
x1 = np.random.uniform(0, w - box_shapes[:, 0]) |
|
y1 = np.random.uniform(0, h - box_shapes[:, 1]) |
|
|
|
boxes = np.concatenate((x1.reshape(-1, 1), |
|
y1.reshape(-1, 1), |
|
(x1 + box_shapes[:, 0]).reshape(-1, 1), |
|
(y1 + box_shapes[:, 1]).reshape(-1, 1)), |
|
axis=1) |
|
|
|
|
|
return boxes |
|
|
|
def sample_box_shapes(self, data: List[np.ndarray]) -> np.ndarray: |
|
""" Sample boxes. |
|
|
|
Args: |
|
data (list): image list, each element is a numpy.ndarray |
|
in shape of [H, W, 3] |
|
|
|
Returns: |
|
boxes (np.ndarray): the sampled bounding boxes. in shape of |
|
[self.num_rois, 4], represented in (x1, y1, x2, y2). |
|
|
|
""" |
|
h, w = data[0].shape[0:2] |
|
|
|
|
|
anchor_inds = np.random.randint(0, len(self.anchors), |
|
size=(self.num_rois, )) |
|
box_shapes = self.anchors[anchor_inds].copy() |
|
if self.scale_jitter is not None: |
|
scale_factors = np.random.uniform(-self.scale_jitter, |
|
self.scale_jitter, |
|
size=(self.num_rois, 2)) |
|
box_shapes = box_shapes * np.exp(scale_factors) |
|
box_shapes[:, 0] = np.clip(box_shapes[:, 0], 1, w - 1) |
|
box_shapes[:, 1] = np.clip(box_shapes[:, 1], 1, h - 1) |
|
|
|
|
|
|
|
return box_shapes |
|
|
|
|
|
class PatchMask(object): |
|
|
|
def __init__(self, |
|
use_objects: bool, |
|
objects_path: str, |
|
region_sampler: dict, |
|
key_frame_probs: list, |
|
loc_velocity: float, |
|
rot_velocity: float, |
|
size_velocity: float, |
|
label_prob: float, |
|
patch_transformation: str, |
|
motion_type: str): |
|
|
|
""" Core transformation in Catch-the-Patch. |
|
|
|
Args: |
|
region_sampler (dict): region sampler setting, it will be used to |
|
construct a RandomRegionSampler object. |
|
key_frame_probs (list): probabilities of sampling how many key |
|
frames. The sum of this list should be 1. |
|
loc_velocity (float): the maximum patch movement speed. (pix per |
|
frame). |
|
size_velocity (float): the maximum size change ratios between two |
|
neighbouring frames. |
|
label_prob (float): how many percentages of frames will be |
|
modified. Note that even the frame is not modified, we still |
|
force the model to infer the patch positions. (see MRM module |
|
in the paper). |
|
""" |
|
self.region_sampler = RandomRegionSampler(**region_sampler) |
|
self.key_frame_probs = key_frame_probs |
|
self.loc_velocity = loc_velocity |
|
self.rot_velocity = rot_velocity |
|
self.size_velocity = size_velocity |
|
self.label_prob = label_prob |
|
if motion_type is not None: |
|
self.motion_type = motion_type |
|
self.patch_transformation = patch_transformation |
|
self.use_objects = use_objects |
|
|
|
if self.use_objects: |
|
|
|
self.object_list = glob(objects_path+"/*/*") |
|
|
|
|
|
print(self.object_list[0:10],len(self.object_list)) |
|
|
|
def paste_objects(self, data, traj_rois, boxes): |
|
|
|
objects_list = [] |
|
label_list = [] |
|
|
|
for i in range(len(boxes)): |
|
objects, crop_index = self.pick_objects(data, traj_rois[i]) |
|
labels = np.random.uniform(0, 1, size=(len(data), )) |
|
labels[crop_index] = 0.0 |
|
labels[0] = 0.0 |
|
labels = labels <= self.label_prob |
|
objects_list.append(objects) |
|
label_list.append(labels) |
|
|
|
return objects_list, None, label_list |
|
|
|
def paste_patches(self, data, traj_rois, boxes): |
|
|
|
patches_list = [] |
|
alphas_list = [] |
|
label_list = [] |
|
|
|
for i in range(len(boxes)): |
|
patches, crop_index = self.pick_patches(data, traj_rois[i]) |
|
alphas = self.pick_alphas(data, traj_rois[i], crop_index) |
|
labels = np.random.uniform(0, 1, size=(len(data), )) |
|
labels[crop_index] = 0.0 |
|
labels[0] = 0.0 |
|
labels = labels <= self.label_prob |
|
patches_list.append(patches) |
|
alphas_list.append(alphas) |
|
label_list.append(labels) |
|
|
|
return patches_list, alphas_list, label_list |
|
|
|
|
|
|
|
|
|
|
|
def pick_patches(self, |
|
data: List[np.ndarray], |
|
traj_rois: np.ndarray) -> tuple: |
|
""" Pick image patches from the raw video frame. |
|
|
|
We just randomly select a frame index, and crop the frame according to |
|
the trajectory rois. This cropped patch will be resized into the |
|
suitable size specified by the traj_rois. |
|
|
|
Args: |
|
data (List[np.ndarray]): list of images, each element is in shape |
|
of [H, W, 3] |
|
traj_rois (np.ndarray): the generated trajectories, in shape of |
|
[N_frames, 4]. (x1, y1, x2, y2) |
|
|
|
Returns: |
|
patches (List[np.ndarray]): the cropped patches |
|
select_idx (int): the frame index which the source patch |
|
cropped from. |
|
""" |
|
traj_sizes = traj_rois[..., 2:4] - traj_rois[..., 0:2] |
|
num = len(traj_sizes) |
|
select_idx = random.randint(0, num - 1) |
|
x1, y1, x2, y2 = traj_rois[select_idx] |
|
traj_rois_H = y2 - y1 |
|
traj_rois_W = x2 - x1 |
|
|
|
img = data[select_idx] |
|
img_H, img_W, _ = img.shape |
|
|
|
if img_W - traj_rois_W - 1 >= 0 and img_H - traj_rois_H - 1 >= 0: |
|
new_x1 = random.randint(0, img_W - traj_rois_W - 1) |
|
new_y1 = random.randint(0, img_H - traj_rois_H - 1) |
|
new_x2 = new_x1 + traj_rois_W |
|
new_y2 = new_y1 + traj_rois_H |
|
img = img[new_y1:new_y2, new_x1:new_x2, :] |
|
else: |
|
img = img |
|
patches = [cv2.resize(img, (traj_sizes[i, 0], traj_sizes[i, 1])) |
|
for i in range(traj_rois.shape[0])] |
|
return patches, select_idx |
|
|
|
def pick_objects(self, |
|
data: List[np.ndarray], |
|
traj_rois: np.ndarray) -> tuple: |
|
""" Pick image patches from the raw video frame. |
|
|
|
We just randomly select a frame index, and crop the frame according to |
|
the trajectory rois. This cropped patch will be resized into the |
|
suitable size specified by the traj_rois. |
|
|
|
Args: |
|
data (List[np.ndarray]): list of images, each element is in shape |
|
of [H, W, 3] |
|
traj_rois (np.ndarray): the generated trajectories, in shape of |
|
[N_frames, 4]. (x1, y1, x2, y2) |
|
|
|
Returns: |
|
patches (List[np.ndarray]): the cropped patches |
|
select_idx (int): the frame index which the source patch |
|
cropped from. |
|
""" |
|
traj_sizes = traj_rois[..., 2:4] - traj_rois[..., 0:2] |
|
num = len(traj_sizes) |
|
select_idx = random.randint(0, num - 1) |
|
|
|
x1, y1, x2, y2 = traj_rois[select_idx] |
|
|
|
|
|
object_ind = random.randint(0, len(self.object_list)- 1) |
|
object_img = Image.open(self.object_list[object_ind]) |
|
object_img = object_img.resize((x2-x1,y2-y1)) |
|
|
|
objects = [object_img.resize((traj_sizes[i, 0], traj_sizes[i, 1])) |
|
for i in range(traj_rois.shape[0])] |
|
|
|
return objects, select_idx |
|
|
|
|
|
|
|
def pick_alphas(self, |
|
data, |
|
traj_rois: np.ndarray, |
|
crop_index: int): |
|
""" Generate the alpha masks for merging the patches into the raw |
|
frames: |
|
out_frame = raw_frame * (1 - alpha) + patch * alpha. |
|
Despite the transparency, the alpha values are also used to mask the |
|
patches into some predefined shapes, like ellipse or rhombus. |
|
There are many strange constants in this function. But we do not |
|
conduct any ablation analysis on these constants. They should have |
|
little impact to the final performances. |
|
|
|
Args: |
|
data (List[np.ndarray]): list of images, each element is in shape |
|
of [H, W, 3] |
|
traj_rois (np.ndarray): the generated trajectories, in shape of |
|
[N_frames, 4]. (x1, y1, x2, y2) |
|
crop_index (int): the frame index which the source patch |
|
cropped from. |
|
|
|
Returns: |
|
alphas (List[np.ndarray]): the generated alpha values |
|
|
|
""" |
|
traj_sizes = traj_rois[..., 2:4] - traj_rois[..., 0:2] |
|
num_frames = traj_sizes.shape[0] |
|
|
|
base_w, base_h = traj_sizes[crop_index] |
|
|
|
base_x_grids, base_y_grids = np.meshgrid( |
|
np.arange(base_w).astype(np.float32), |
|
np.arange(base_h).astype(np.float32) |
|
|
|
) |
|
ctr_w = (base_w - 1) // 2 |
|
ctr_h = (base_h - 1) // 2 |
|
|
|
dist_to_ctr_x = np.abs(base_x_grids - ctr_w) / base_w |
|
dist_to_ctr_y = np.abs(base_y_grids - ctr_h) / base_h |
|
|
|
mask_type = int(np.random.choice(3, p=[0.5, 0.35, 0.15])) |
|
if mask_type == 0: |
|
dist_to_ctr = np.maximum(dist_to_ctr_x, dist_to_ctr_y) |
|
base_alpha = np.ones((base_h, base_w), np.float32) |
|
elif mask_type == 1: |
|
dist_to_ctr = np.sqrt(dist_to_ctr_x ** 2 + dist_to_ctr_y ** 2) |
|
base_alpha = np.where(dist_to_ctr < 0.5, |
|
np.ones((base_h, base_w), np.float32), |
|
np.zeros((base_h, base_w), np.float32)) |
|
elif mask_type == 2: |
|
dist_to_ctr = (dist_to_ctr_x + dist_to_ctr_y) |
|
base_alpha = np.where(dist_to_ctr < 0.5, |
|
np.ones((base_h, base_w), np.float32), |
|
np.zeros((base_h, base_w), np.float32)) |
|
else: |
|
raise NotImplementedError |
|
|
|
use_smooth_edge = random.uniform(0, 1) < 0.5 |
|
if use_smooth_edge: |
|
turning_point = random.uniform(0.30, 0.45) |
|
k = -1 / (0.5 - turning_point) |
|
alpha_mul = k * dist_to_ctr - 0.5 * k |
|
alpha_mul = np.clip(alpha_mul, 0, 1) |
|
base_alpha = base_alpha * alpha_mul |
|
|
|
|
|
key_inds = sample_key_frames(num_frames, self.key_frame_probs) |
|
frame_alphas = np.random.uniform(0.8, 1.0, size=(len(key_inds), 1)) |
|
frame_alphas = extend_key_frame_to_all(frame_alphas, key_inds) |
|
|
|
alphas = [] |
|
for frame_idx in range(num_frames): |
|
w, h = traj_sizes[frame_idx] |
|
i_alpha = cv2.resize(base_alpha, (w, h)) |
|
i_alpha = i_alpha * frame_alphas[frame_idx] |
|
alphas.append(i_alpha) |
|
return alphas |
|
|
|
def get_rotation_angles(self, |
|
num_frames, |
|
transform_param: dict): |
|
key_frame_probs = transform_param['key_frame_probs'] |
|
loc_key_inds = sample_key_frames(num_frames, key_frame_probs) |
|
|
|
rot_velocity = transform_param['rot_velocity'] |
|
rot_angles = np.zeros((transform_param['traj_rois'].shape[0],1)) |
|
|
|
|
|
rot_angles_list= [np.expand_dims(rot_angles, axis=0)] |
|
for i in range(len(loc_key_inds) - 1): |
|
if rot_velocity > 0: |
|
index_diff = loc_key_inds[i + 1] - loc_key_inds[i] |
|
shifts = np.random.uniform(low=-rot_velocity* index_diff, |
|
high=rot_velocity* index_diff, |
|
size=rot_angles.shape) |
|
rot_angles = rot_angles + shifts |
|
rot_angles_list.append(np.expand_dims(rot_angles, axis=0)) |
|
rot_angles = np.concatenate(rot_angles_list, axis=0) |
|
rot_angles = extend_key_frame_to_all(rot_angles, loc_key_inds, 'random') |
|
rot_angles = rot_angles.transpose((1, 0, 2)) |
|
|
|
|
|
return rot_angles |
|
|
|
def get_shear_factors(self, |
|
num_frames, |
|
transform_param: dict): |
|
key_frame_probs = transform_param['key_frame_probs'] |
|
loc_key_inds = sample_key_frames(num_frames, key_frame_probs) |
|
|
|
|
|
|
|
rot_velocity = transform_param['rot_velocity'] |
|
rot_angles = np.zeros((transform_param['traj_rois'].shape[0],1)) |
|
|
|
|
|
rot_angles_list= [np.expand_dims(rot_angles, axis=0)] |
|
for i in range(len(loc_key_inds) - 1): |
|
if rot_velocity > 0: |
|
index_diff = loc_key_inds[i + 1] - loc_key_inds[i] |
|
shifts = np.random.uniform(low=-rot_velocity* index_diff, |
|
high=rot_velocity* index_diff, |
|
size=rot_angles.shape) |
|
|
|
|
|
|
|
rot_angles = rot_angles + shifts |
|
rot_angles_list.append(np.expand_dims(rot_angles, axis=0)) |
|
rot_angles = np.concatenate(rot_angles_list, axis=0) |
|
rot_angles = extend_key_frame_to_all(rot_angles, loc_key_inds, 'random') |
|
rot_angles = rot_angles.transpose((1, 0, 2)) |
|
|
|
return rot_angles |
|
|
|
|
|
def _apply_image(self, |
|
data: List[np.ndarray], |
|
transform_param: dict): |
|
|
|
data_1 = data |
|
|
|
|
|
|
|
|
|
sizes = transform_param['traj_rois'][..., 2:4] - \ |
|
transform_param['traj_rois'][..., 0:2] |
|
avg_sizes = np.prod(np.mean(sizes, axis=1), axis=1) |
|
arg_rank = np.argsort(avg_sizes)[::-1] |
|
|
|
width, height,_ = data_1[0].shape |
|
|
|
|
|
|
|
if self.use_objects: |
|
|
|
if transform_param['patch_transformation'] == 'rotation': |
|
rot_angles = self.get_rotation_angles(len(data_1),transform_param) |
|
transformed_data_1 = [] |
|
for frame_idx in range(len(data_1)): |
|
i_rois = transform_param['traj_rois'][:, frame_idx, :] |
|
img = data_1[frame_idx].copy() |
|
for patch_idx in arg_rank: |
|
if not transform_param['traj_labels'][patch_idx][frame_idx]: |
|
continue |
|
i_object = transform_param['patches'][patch_idx][frame_idx] |
|
i_object = np.array(i_object) |
|
angle = int(rot_angles[patch_idx][frame_idx]) |
|
rotated_i_object = imutils.rotate_bound(i_object, angle) |
|
|
|
rotated_i_alpha = rotated_i_object[..., -1] |
|
rotated_i_alpha = rotated_i_alpha / 255.0 |
|
rotated_i_object = rotated_i_object[..., :3] |
|
|
|
h_prime, w_prime, channels = rotated_i_object.shape |
|
x1, y1, x2, y2 = i_rois[patch_idx] |
|
h, w = y2 - y1, x2 - x1 |
|
if ((h_prime - h) % 2) == 0: |
|
delta_h1 = delta_h2 = math.ceil((h_prime - h) / 2) |
|
else: |
|
delta_h1 = math.ceil((h_prime - h) / 2) |
|
delta_h2 = math.floor((h_prime - h) / 2) |
|
if ((w_prime - w) % 2) == 0: |
|
delta_w1 = delta_w2 = math.ceil((w_prime - w) / 2) |
|
else: |
|
delta_w1 = math.ceil((w_prime - w) / 2) |
|
delta_w2 = math.floor((w_prime - w) / 2) |
|
|
|
x1_new, y1_new, x2_new, y2_new = x1 - delta_w1, y1 - delta_h1, x2 + delta_w2, y2 + delta_h2 |
|
if all(i >= 0 for i in [x1_new, y1_new, x2_new, y2_new]) and all( |
|
i < width for i in [x1_new, y1_new, x2_new, y2_new]): |
|
|
|
i_patch = rotated_i_object |
|
i_alpha = rotated_i_alpha[..., np.newaxis] |
|
img[y1_new:y2_new, x1_new:x2_new, :] = img[y1_new:y2_new, x1_new:x2_new, :] * (1 - i_alpha) + i_patch * i_alpha |
|
else: |
|
|
|
img_H, img_W, C = img.shape |
|
patch_H, patch_W, _ = rotated_i_object.shape |
|
extended_img = np.zeros((img_H + 2 * patch_H, img_W + 2 * patch_W, C), dtype=img.dtype) |
|
extended_img[patch_H:(img_H + patch_H), patch_W:(img_W + patch_W), :] = img |
|
|
|
x1_new += patch_W |
|
x2_new += patch_W |
|
y1_new += patch_H |
|
y2_new += patch_H |
|
i_alpha = rotated_i_alpha[..., np.newaxis] |
|
extended_img[y1_new:y2_new, x1_new:x2_new, :] = extended_img[y1_new:y2_new, x1_new:x2_new, :] * (1 - i_alpha) + rotated_i_object * i_alpha |
|
img = extended_img[patch_H:(img_H + patch_H), patch_W:(img_W + patch_W), :] |
|
|
|
img = np.array(img) |
|
transformed_data_1.append(img) |
|
|
|
return transformed_data_1 |
|
|
|
|
|
@staticmethod |
|
def rectangle_movement(boxes: np.ndarray, |
|
img_wh: tuple, |
|
loc_velocity: float, |
|
size_velocity: float, |
|
num_frames: int, |
|
key_frame_probs: List[float]) -> np.ndarray: |
|
""" Simulate the object movement. |
|
|
|
Args: |
|
boxes (np.ndarray): in shpae of [N_boxes, 4] |
|
img_wh (tuple): image width and image height |
|
loc_velocity (float): max speed of the center point movement |
|
size_velocity (float): max speed of size changes |
|
num_frames (int): number of frames |
|
key_frame_probs (float): probability distribution of how many key |
|
frames will be sampled. |
|
|
|
Returns |
|
all_boxes (np.ndarray): the generated box trajectory, in shpae |
|
of [N_traj, N_frame, 4]. |
|
|
|
""" |
|
|
|
loc_key_inds = sample_key_frames(num_frames, key_frame_probs) |
|
|
|
ctr_pts = (boxes[:, 0:2] + boxes[:, 2:4]) * 0.5 |
|
|
|
box_sizes = (boxes[:, 2:4] - boxes[:, 0:2]) |
|
|
|
|
|
min_ctr_pts = box_sizes * 0.5 |
|
max_ctr_pts = np.array(img_wh[0:2]).reshape(1, 2) - box_sizes * 0.5 |
|
|
|
|
|
ctr_pts_list = [np.expand_dims(ctr_pts, axis=0)] |
|
|
|
for i in range(len(loc_key_inds) - 1): |
|
if loc_velocity > 0: |
|
index_diff = loc_key_inds[i + 1] - loc_key_inds[i] |
|
shifts = np.random.uniform(low=-loc_velocity * index_diff, |
|
high=loc_velocity * index_diff, |
|
size=ctr_pts.shape) |
|
|
|
ctr_pts = ctr_pts + shifts |
|
ctr_pts = np.clip(ctr_pts, min_ctr_pts, max_ctr_pts) |
|
ctr_pts_list.append(np.expand_dims(ctr_pts, axis=0)) |
|
ctr_pts = np.concatenate(ctr_pts_list, axis=0) |
|
|
|
ctr_pts = extend_key_frame_to_all(ctr_pts, loc_key_inds, 'random') |
|
|
|
|
|
|
|
size_key_inds = sample_key_frames(num_frames, key_frame_probs) |
|
|
|
|
|
box_sizes_list = [np.expand_dims(box_sizes, axis=0)] |
|
for i in range(len(size_key_inds) - 1): |
|
if size_velocity > 0: |
|
index_diff = size_key_inds[i + 1] - size_key_inds[i] |
|
scales = np.random.uniform(low=-size_velocity * index_diff, |
|
high=size_velocity * index_diff, |
|
size=box_sizes.shape) |
|
scales = np.exp(scales) |
|
box_sizes = box_sizes * scales |
|
box_sizes_list.append(np.expand_dims(box_sizes, axis=0)) |
|
box_sizes = np.concatenate(box_sizes_list, axis=0) |
|
|
|
box_sizes = extend_key_frame_to_all(box_sizes, size_key_inds, 'random') |
|
|
|
|
|
|
|
all_boxes = np.concatenate((ctr_pts - box_sizes * 0.5, |
|
ctr_pts + box_sizes * 0.5), axis=2) |
|
|
|
|
|
all_boxes = all_boxes.transpose((1, 0, 2)) |
|
return all_boxes |
|
|
|
@staticmethod |
|
def gaussian_movement(box_shapes: np.ndarray, |
|
img_wh: tuple, |
|
num_trajs: int, |
|
size_velocity: float, |
|
num_frames: int, |
|
key_frame_probs: List[float]) -> np.ndarray: |
|
""" Simulate the object movement. |
|
|
|
Args: |
|
|
|
Returns |
|
all_boxes (np.ndarray): the generated box trajectory, in shpae |
|
of [N_traj, N_frame, 4]. |
|
|
|
""" |
|
|
|
def create_traj(box_shapes): |
|
w = img_wh[0] |
|
h = img_wh[1] |
|
|
|
|
|
n_points = 48 |
|
sigma = 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
x = np.random.uniform(1+box_shapes[0]/2,w-1-box_shapes[0]/2,n_points) |
|
y = np.random.uniform(1+box_shapes[1]/2,h-1-box_shapes[1]/2,n_points) |
|
|
|
|
|
|
|
|
|
|
|
|
|
xk = gaussian_filter1d(x, sigma=sigma, mode='reflect') |
|
yk = gaussian_filter1d(y, sigma=sigma, mode='reflect') |
|
|
|
|
|
xkk = (xk -xk.min()) |
|
xkk /= xkk.max() |
|
ykk = (yk -yk.min()) |
|
ykk /= ykk.max() |
|
|
|
|
|
scaling_factor = np.random.randint(40,180) |
|
xkk*=scaling_factor |
|
ykk*=scaling_factor |
|
|
|
|
|
|
|
translation_factor_x = np.random.randint(0,w-scaling_factor) |
|
translation_factor_y = np.random.randint(0,h-scaling_factor) |
|
tr_x = xkk + translation_factor_x |
|
tr_y = ykk + translation_factor_y |
|
|
|
tr_x = np.clip(tr_x,0,w-1) |
|
tr_y = np.clip(tr_y,0,h-1) |
|
|
|
|
|
idxs = np.round(np.linspace(0, tr_x.shape[0]-1, num=16)).astype(int) |
|
x_f = tr_x[idxs].astype(int) |
|
y_f = tr_y[idxs].astype(int) |
|
|
|
traj = np.column_stack((x_f,y_f)) |
|
traj = np.expand_dims(traj, axis=1) |
|
return traj |
|
|
|
|
|
|
|
ctr_pts_list = [] |
|
for i in range(num_trajs): |
|
ctr_pts_list.append(create_traj(box_shapes[i])) |
|
ctr_pts = np.concatenate(ctr_pts_list, axis=1) |
|
|
|
|
|
|
|
|
|
boxes_list = [] |
|
for i in range(num_trajs): |
|
x1, y1 = ctr_pts[0][i][0], ctr_pts[0][i][1] |
|
box = np.concatenate(( |
|
(x1 - box_shapes[i, 0]/2).reshape(-1, 1), |
|
(y1 - box_shapes[i, 1]/2).reshape(-1, 1), |
|
(x1 + box_shapes[i, 0]/2).reshape(-1, 1), |
|
(y1 + box_shapes[i, 1]/2).reshape(-1, 1)), |
|
axis=1) |
|
boxes_list.append(box) |
|
|
|
boxes= np.concatenate(boxes_list, axis=0) |
|
box_sizes = (boxes[:, 2:4] - boxes[:, 0:2]) |
|
|
|
|
|
|
|
|
|
size_key_inds = sample_key_frames(num_frames, key_frame_probs) |
|
|
|
box_sizes_list = [np.expand_dims(box_sizes, axis=0)] |
|
for i in range(len(size_key_inds) - 1): |
|
if size_velocity > 0: |
|
index_diff = size_key_inds[i + 1] - size_key_inds[i] |
|
scales = np.random.uniform(low=-size_velocity * index_diff, |
|
high=size_velocity * index_diff, |
|
size=box_sizes.shape) |
|
scales = np.exp(scales) |
|
box_sizes = box_sizes * scales |
|
box_sizes_list.append(np.expand_dims(box_sizes, axis=0)) |
|
box_sizes = np.concatenate(box_sizes_list, axis=0) |
|
|
|
box_sizes = extend_key_frame_to_all(box_sizes, size_key_inds, 'random') |
|
|
|
|
|
|
|
all_boxes = np.concatenate((ctr_pts - box_sizes * 0.5, |
|
ctr_pts + box_sizes * 0.5), axis=2) |
|
|
|
|
|
all_boxes = all_boxes.transpose((1, 0, 2)) |
|
return all_boxes,boxes |
|
|
|
def __call__(self,img_tuple): |
|
|
|
""" Generate the transformation parameters. |
|
|
|
Args: |
|
data (List[np.ndarray]): list of image array, each element is in |
|
a shape of [H, W, 3] |
|
|
|
Returns: |
|
params (dict): a dict that contains necessary transformation |
|
params, which include: |
|
'patches': list of image patches (np.ndarray) |
|
'alphas': list of alpha mask, same size and shape as patches. |
|
'traj_rois': the trajectory position, in shape of |
|
[N_traj, N_frame, 4] |
|
'traj_labels': whether the patches have been pasted on some |
|
specific frames, in shape of [N_traj, N_frame] |
|
""" |
|
|
|
|
|
|
|
img_group, label = img_tuple |
|
|
|
|
|
|
|
new_data = [np.array(img) for img in img_group] |
|
|
|
|
|
|
|
data_1 = new_data |
|
|
|
h, w = data_1[0].shape[0:2] |
|
|
|
|
|
|
|
if self.motion_type == 'linear' : |
|
|
|
boxes = self.region_sampler.sample(data_1) |
|
|
|
traj_rois = self.rectangle_movement(boxes, (w, h), |
|
self.loc_velocity, |
|
self.size_velocity, |
|
len(data_1), |
|
self.key_frame_probs) |
|
|
|
elif self.motion_type == 'gaussian' : |
|
|
|
box_shapes = self.region_sampler.sample_box_shapes(data_1) |
|
|
|
traj_rois,boxes = self.gaussian_movement(box_shapes, (w, h), |
|
self.region_sampler.num_rois, |
|
self.size_velocity, |
|
len(data_1), |
|
self.key_frame_probs) |
|
|
|
|
|
traj_rois = np.round(traj_rois).astype(int) |
|
|
|
|
|
|
|
|
|
if not self.use_objects: |
|
|
|
|
|
patches_list, alphas_list, label_list = self.paste_patches(data_1,traj_rois,boxes) |
|
else: |
|
|
|
patches_list, alphas_list, label_list = self.paste_objects(data_1,traj_rois,boxes) |
|
|
|
|
|
|
|
transforms_dict = dict( |
|
traj_rois=traj_rois, |
|
patches=patches_list, |
|
alphas=alphas_list, |
|
traj_labels=label_list, |
|
rot_velocity = self.rot_velocity, |
|
patch_transformation = self.patch_transformation, |
|
key_frame_probs = self.key_frame_probs |
|
) |
|
|
|
output_data = self._apply_image( new_data,transforms_dict) |
|
|
|
ret_data = [Image.fromarray(img) for img in output_data] |
|
|
|
return ret_data, label, traj_rois |
|
|