import os import json import numpy as np import torch from imagebind import data from imagebind.models.imagebind_model import ModalityType from mogen.datasets.human_body_prior.body_model.body_model import BodyModel from mogen.datasets.quaternion import qrot, qinv from pytorch3d.transforms import axis_angle_to_matrix def recover_root_rot_pos(data): rot_vel = data[..., 0] r_rot_ang = torch.zeros_like(rot_vel).to(data.device) '''Get Y-axis rotation from rotation velocity''' r_rot_ang[..., 1:] = rot_vel[..., :-1] r_rot_ang = torch.cumsum(r_rot_ang, dim=-1) r_rot_quat = torch.zeros(data.shape[:-1] + (4, )).to(data.device) r_rot_quat[..., 0] = torch.cos(r_rot_ang) r_rot_quat[..., 2] = torch.sin(r_rot_ang) r_pos = torch.zeros(data.shape[:-1] + (3, )).to(data.device) r_pos[..., 1:, [0, 2]] = data[..., :-1, 1:3] '''Add Y-axis rotation to root position''' r_pos = qrot(qinv(r_rot_quat), r_pos) r_pos = torch.cumsum(r_pos, dim=-2) r_pos[..., 1] = data[..., 3] return r_rot_quat, r_pos def recover_from_ric(data, joints_num): r_rot_quat, r_pos = recover_root_rot_pos(data) positions = data[..., 4:(joints_num - 1) * 3 + 4] positions = positions.view(positions.shape[:-1] + (-1, 3)) '''Add Y-axis rotation to local joints''' rot = qinv(r_rot_quat[..., None, :]).expand(positions.shape[:-1] + (4, )) positions = qrot(rot, positions) '''Add root XZ to joints''' positions[..., 0] += r_pos[..., 0:1] positions[..., 2] += r_pos[..., 2:3] '''Concate root and joints''' positions = torch.cat([r_pos.unsqueeze(-2), positions], dim=-2) return positions def create_data_item(meta_data, root_dir, basename, tomato_repr=None, keypoints3d=None, expression=None, smpl_rot=None, bvh_rot=None): assert os.path.exists(root_dir) meta_dir = os.path.join(root_dir, 'metas') motion_dir = os.path.join(root_dir, 'motions') os.makedirs(meta_dir, exist_ok=True) os.makedirs(motion_dir, exist_ok=True) motion_data = {} if tomato_repr is not None: motion_data['tomato_repr'] = tomato_repr if keypoints3d is not None: motion_data['keypoints3d'] = keypoints3d num_frames = keypoints3d.shape[0] keypoints3d = keypoints3d.reshape((num_frames, -1)) if expression is not None: motion_data['expression'] = expression if smpl_rot is not None: motion_data['smpl_rot'] = smpl_rot if bvh_rot is not None: motion_data['bvh_rot'] = bvh_rot motion_path = os.path.join(motion_dir, basename + '.npz') meta_path = os.path.join(meta_dir, basename + '.json') np.savez_compressed(motion_path, **motion_data) json.dump(meta_data, open(meta_path, 'w'), indent=4) def extract_text_feature(model, text, device): text_list = text inputs = { ModalityType.TEXT: data.load_and_transform_text(text_list, device), } with torch.no_grad(): text_word_feat, text_seq_feat = model(inputs) return text_word_feat, text_seq_feat def extract_image_feature(model, image_paths, device): inputs = { ModalityType.VISION: data.load_and_transform_vision_data(image_paths, device), } with torch.no_grad(): _, embeddings = model(inputs) return embeddings def extract_audio_feature(model, audio_paths, device): inputs = { ModalityType.AUDIO: data.load_and_transform_audio_data(audio_paths, device, clips_per_video=1), } with torch.no_grad(): audio_word_feat, audio_seq_feat = model(inputs) return audio_word_feat, audio_seq_feat def copy_repr_data(src_data, src_idx, num_src_joints, tgt_data, tgt_idx, num_tgt_joints): # ric_data tgt_base1 = 4 + (tgt_idx - 1) * 3 src_base1 = 4 + (src_idx - 1) * 3 tgt_data[:, tgt_base1: tgt_base1 + 3] = \ src_data[:, src_base1: src_base1 + 3] # rot_data tgt_base2 = 4 + (num_tgt_joints - 1) * 3 + (tgt_idx - 1) * 6 src_base2 = 4 + (num_src_joints - 1) * 3 + (src_idx - 1) * 6 tgt_data[:, tgt_base2: tgt_base2 + 6] = \ src_data[:, src_base2: src_base2 + 6] # local velocity tgt_base3 = 4 + (num_tgt_joints - 1) * 9 + tgt_idx * 3 src_base3 = 4 + (num_src_joints - 1) * 9 + src_idx * 3 tgt_data[:, tgt_base3: tgt_base3 + 3] = \ src_data[:, src_base3: src_base3 + 3] def extract_repr_data(data, idx, num_joints): assert idx > 0 base1 = 4 + (idx - 1) * 3 ric_data = data[:, base1: base1 + 3] base2 = 4 + (num_joints - 1) * 3 + (idx - 1) * 6 rot_data = data[:, base2: base2 + 6] base3 = 4 + (num_joints - 1) * 9 + idx * 3 local_vel = data[:, base3: base3 + 3] if isinstance(data, torch.Tensor): output = torch.cat((ric_data, rot_data, local_vel), dim=-1) else: output = np.concatenate((ric_data, rot_data, local_vel), axis=-1) return output def move_repr_data(data, idx, num_joints, output): assert idx > 0 assert data.shape[1] == 12 base1 = 4 + (idx - 1) * 3 output[:, base1: base1 + 3] = data[:, :3] base2 = 4 + (num_joints - 1) * 3 + (idx - 1) * 6 output[:, base2: base2 + 6] = data[:, 3: 9] base3 = 4 + (num_joints - 1) * 9 + idx * 3 output[:, base3: base3 + 3] = data[:, 9:] def estimate_repr_data(data, idx1, idx2, tgt, ratio, num_joints): # direction: same as idx1 # position: |idx1 - tgt| / |idx1 - idx2| = ratio assert 0 <= ratio <= 1, "ratio should be between 0 and 1" assert 1 <= idx1 <= num_joints, "idx1 out of range" assert 1 <= idx2 <= num_joints, "idx2 out of range" assert 1 <= tgt <= num_joints, "tgt out of range" # ric data base1 = 4 + (idx1 - 1) * 3 base2 = 4 + (idx2 - 1) * 3 baset = 4 + (tgt - 1) * 3 pose1 = data[:, base1: base1 + 3] pose2 = data[:, base2: base2 + 3] poset = pose1 * (1 - ratio) + pose2 * ratio data[:, baset: baset + 3] = poset # rot_data base1 = 4 + (num_joints - 1) * 3 + (idx1 - 1) * 6 baset = 4 + (num_joints - 1) * 3 + (tgt - 1) * 6 data[:, baset: baset + 6] = data[:, base1: base1 + 6] # local velocity base1 = 4 + (num_joints - 1) * 9 + idx1 * 3 base2 = 4 + (num_joints - 1) * 9 + idx2 * 3 baset = 4 + (num_joints - 1) * 9 + tgt * 3 vel1 = data[:, base1: base1 + 3] vel2 = data[:, base2: base2 + 3] velt = vel1 * (1 - ratio) + vel2 * ratio data[:, baset: baset + 3] = velt class BodyModelWrapper: def __init__(self, device): file_path = os.path.abspath(os.path.dirname(__file__)) body_model_dir = os.path.join(file_path, '../../data/motionverse/body_models') male_bm_path = os.path.join(body_model_dir, 'smplh/male/model.npz') male_dmpl_path = os.path.join(body_model_dir, 'dmpls/male/model.npz') female_bm_path = os.path.join(body_model_dir, 'smplh/female/model.npz') female_dmpl_path = os.path.join(body_model_dir, 'dmpls/female/model.npz') neutral_bm_path = os.path.join(body_model_dir, 'smplh/neutral/model.npz') neutral_dmpl_path = os.path.join(body_model_dir, 'dmpls/neutral/model.npz') self.num_betas = 10 # number of body parameters self.num_dmpls = 8 # number of DMPL parameters self.male_bm = BodyModel( bm_fname=male_bm_path, num_betas=self.num_betas, num_dmpls=self.num_dmpls, dmpl_fname=male_dmpl_path).to(device) self.female_bm = BodyModel( bm_fname=female_bm_path, num_betas=self.num_betas, num_dmpls=self.num_dmpls, dmpl_fname=female_dmpl_path).to(device) self.neutral_bm = BodyModel( bm_fname=neutral_bm_path, num_betas=self.num_betas, num_dmpls=self.num_dmpls, dmpl_fname=neutral_dmpl_path).to(device) self.device = device def process_smplh(self, smplh_data, downsample=1): poses = smplh_data['poses'][::downsample] trans = smplh_data['trans'][::downsample] betas = smplh_data['betas'] if len(betas.shape) == 1: betas = betas[:self.num_betas][np.newaxis] betas = np.repeat(betas, repeats=len(trans), axis=0) else: betas = betas[:, :self.num_betas] body_parms = { 'root_orient': torch.Tensor(poses[:, :3]).to(self.device), 'pose_body': torch.Tensor(poses[:, 3:66]).to(self.device), 'pose_hand': torch.Tensor(poses[:, 66:]).to(self.device), 'trans': torch.Tensor(trans).to(self.device), 'betas': torch.Tensor(betas).to(self.device), } gender = smplh_data.get('gender', 'neutral') if gender == 'male' or gender == 'm': bm = self.male_bm elif gender == 'female' or gender == 'f': bm = self.female_bm else: bm = self.neutral_bm with torch.no_grad(): body = bm(**body_parms) pose_seq_np = body.Jtr.detach().cpu().numpy() return pose_seq_np def ang2joint(p3d0, pose, parent={0: -1, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 9, 14: 9, 15: 12, 16: 13, 17: 14, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21}): """ :param p3d0:[batch_size, joint_num, 3] :param pose:[batch_size, joint_num, 3] :param parent: :return: """ def with_zeros(x): """ Append a [0, 0, 0, 1] tensor to a [3, 4] tensor. Parameter: --------- x: Tensor to be appended. Return: ------ Tensor after appending of shape [4,4] """ ones = torch.tensor( [[[0.0, 0.0, 0.0, 1.0]]], dtype=torch.float ).expand(x.shape[0], -1, -1).to(x.device) ret = torch.cat((x, ones), dim=1) return ret batch_num = p3d0.shape[0] jnum = len(parent.keys()) J = p3d0 R_cube_big = axis_angle_to_matrix(pose.contiguous().view(-1, 1, 3)).reshape(batch_num, -1, 3, 3) results = [] results.append( with_zeros(torch.cat((R_cube_big[:, 0], torch.reshape(J[:, 0, :], (-1, 3, 1))), dim=2)) ) for i in range(1, jnum): results.append( torch.matmul( results[parent[i]], with_zeros( torch.cat( (R_cube_big[:, i], torch.reshape(J[:, i, :] - J[:, parent[i], :], (-1, 3, 1))), dim=2 ) ) ) ) stacked = torch.stack(results, dim=1) J_transformed = stacked[:, :, :3, 3] return J_transformed