import random import numpy as np import torch from torch.nn.utils.rnn import pad_sequence def downsample(data_numpy, step, random_sample=True): # input: C,T,V,M begin = np.random.randint(step) if random_sample else 0 return data_numpy[:, begin::step, :, :] def temporal_slice(data_numpy, step): # input: C,T,V,M C, T, V, M = data_numpy.shape return ( data_numpy.reshape(C, T / step, step, V, M) .transpose((0, 1, 3, 2, 4)) .reshape(C, T / step, V, step * M) ) def mean_subtractor(data_numpy, mean): # input: C,T,V,M # naive version if mean == 0: return C, T, V, M = data_numpy.shape valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0 begin = valid_frame.argmax() end = len(valid_frame) - valid_frame[::-1].argmax() data_numpy[:, :end, :, :] = data_numpy[:, :end, :, :] - mean return data_numpy def auto_pading(data_numpy, size, random_pad=False): C, T, V, M = data_numpy.shape if T < size: begin = random.randint(0, size - T) if random_pad else 0 data_numpy_paded = np.zeros((C, size, V, M)) data_numpy_paded[:, begin : begin + T, :, :] = data_numpy return data_numpy_paded else: return data_numpy def random_choose(data_numpy, size, auto_pad=True): # input: C,T,V,M 随机选择其中一段,不是很合理。因为有0 C, T, V, M = data_numpy.shape if T == size: return data_numpy elif T < size: if auto_pad: return auto_pading(data_numpy, size, random_pad=True) else: return data_numpy else: begin = random.randint(0, T - size) return data_numpy[:, begin : begin + size, :, :] def random_move( data_numpy, angle_candidate=[-10.0, -5.0, 0.0, 5.0, 10.0], scale_candidate=[0.9, 1.0, 1.1], transform_candidate=[-0.2, -0.1, 0.0, 0.1, 0.2], move_time_candidate=[1], ): # input: C,T,V,M C, T, V, M = data_numpy.shape move_time = random.choice(move_time_candidate) node = np.arange(0, T, T * 1.0 / move_time).round().astype(int) node = np.append(node, T) num_node = len(node) A = np.random.choice(angle_candidate, num_node) S = np.random.choice(scale_candidate, num_node) T_x = np.random.choice(transform_candidate, num_node) T_y = np.random.choice(transform_candidate, num_node) a = np.zeros(T) s = np.zeros(T) t_x = np.zeros(T) t_y = np.zeros(T) # linspace for i in range(num_node - 1): a[node[i] : node[i + 1]] = ( np.linspace(A[i], A[i + 1], node[i + 1] - node[i]) * np.pi / 180 ) s[node[i] : node[i + 1]] = np.linspace(S[i], S[i + 1], node[i + 1] - node[i]) t_x[node[i] : node[i + 1]] = np.linspace( T_x[i], T_x[i + 1], node[i + 1] - node[i] ) t_y[node[i] : node[i + 1]] = np.linspace( T_y[i], T_y[i + 1], node[i + 1] - node[i] ) theta = np.array( [[np.cos(a) * s, -np.sin(a) * s], [np.sin(a) * s, np.cos(a) * s]] ) # xuanzhuan juzhen # perform transformation for i_frame in range(T): xy = data_numpy[0:2, i_frame, :, :] new_xy = np.dot(theta[:, :, i_frame], xy.reshape(2, -1)) new_xy[0] += t_x[i_frame] new_xy[1] += t_y[i_frame] # pingyi bianhuan data_numpy[0:2, i_frame, :, :] = new_xy.reshape(2, V, M) return data_numpy def random_shift(data_numpy): # input: C,T,V,M 偏移其中一段 C, T, V, M = data_numpy.shape data_shift = np.zeros(data_numpy.shape) valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0 begin = valid_frame.argmax() end = len(valid_frame) - valid_frame[::-1].argmax() size = end - begin bias = random.randint(0, T - size) data_shift[:, bias : bias + size, :, :] = data_numpy[:, begin:end, :, :] return data_shift def openpose_match(data_numpy): C, T, V, M = data_numpy.shape assert C == 3 score = data_numpy[2, :, :, :].sum(axis=1) # the rank of body confidence in each frame (shape: T-1, M) rank = (-score[0 : T - 1]).argsort(axis=1).reshape(T - 1, M) # data of frame 1 xy1 = data_numpy[0:2, 0 : T - 1, :, :].reshape(2, T - 1, V, M, 1) # data of frame 2 xy2 = data_numpy[0:2, 1:T, :, :].reshape(2, T - 1, V, 1, M) # square of distance between frame 1&2 (shape: T-1, M, M) distance = ((xy2 - xy1) ** 2).sum(axis=2).sum(axis=0) # match pose forward_map = np.zeros((T, M), dtype=int) - 1 forward_map[0] = range(M) for m in range(M): choose = rank == m forward = distance[choose].argmin(axis=1) for t in range(T - 1): distance[t, :, forward[t]] = np.inf forward_map[1:][choose] = forward assert np.all(forward_map >= 0) # string data for t in range(T - 1): forward_map[t + 1] = forward_map[t + 1][forward_map[t]] # generate data new_data_numpy = np.zeros(data_numpy.shape) for t in range(T): new_data_numpy[:, t, :, :] = data_numpy[:, t, :, forward_map[t]].transpose( 1, 2, 0 ) data_numpy = new_data_numpy # score sort trace_score = data_numpy[2, :, :, :].sum(axis=1).sum(axis=0) rank = (-trace_score).argsort() data_numpy = data_numpy[:, :, :, rank] return data_numpy def pad(tensor, padding_value=0): return pad_sequence(tensor, batch_first=True, padding_value=padding_value) def collate_with_padding(batch): data = [torch.tensor(item[0].transpose(1, 0, 2, 3)) for item in batch] target = [torch.tensor(item[1]) for item in batch] gt = [torch.tensor(item[2]) for item in batch] mask = [torch.tensor(item[3]) for item in batch] data = pad(data).transpose(1, 2) target = torch.tensor(target) gt = pad(gt) mask = pad(mask) return [data, target, gt, mask] def collate_with_padding_multi(batch): data = [torch.tensor(item[0].transpose(1, 0, 2, 3)) for item in batch] target = [torch.tensor(item[1]) for item in batch] gt = [torch.tensor(item[2]) for item in batch] mask = [torch.tensor(item[3]) for item in batch] data = pad(data).transpose(1, 2) target = torch.stack(target) gt = pad(gt) mask = pad(mask) return [data, target, gt, mask] def collate_with_padding_multi_velo(batch): data = [torch.tensor(item[0].transpose(1, 0, 2, 3)) for item in batch] velo = [torch.tensor(item[1].transpose(1, 0, 2, 3)) for item in batch] target = [torch.tensor(item[2]) for item in batch] gt = [torch.tensor(item[3]) for item in batch] mask = [torch.tensor(item[4]) for item in batch] data = pad(data).transpose(1, 2) velo = pad(velo).transpose(1, 2) target = torch.stack(target) gt = pad(gt) mask = pad(mask) return [data, velo, target, gt, mask] def collate_with_padding_multi_joint(batch): data = [torch.tensor(item[0].transpose(1, 0, 2, 3)) for item in batch] # shape? target = [torch.tensor(item[1]) for item in batch] # video level label gt = [torch.tensor(item[2]) for item in batch] # frame level label mask = [torch.tensor(item[3]) for item in batch] index = [torch.tensor(item[4]) for item in batch] soft_label = [torch.tensor(item[5]) for item in batch] data = pad(data).transpose(1, 2) # pad joints seq with 0, rather than the last frame target = torch.stack(target) gt = pad(gt,padding_value=4) # pad frame level label with 0, so 0 hhere have to stands for 'background', o for 4 action it will be 0,1,2,3,4, mask = pad(mask) index = torch.tensor(index) soft_label = pad(soft_label, padding_value=-100) return [data, target, gt, mask, index, soft_label]