|
import numpy as np |
|
import pickle as pkl |
|
import torch |
|
import trimesh |
|
import cv2 |
|
import os |
|
from tqdm import tqdm |
|
import glob |
|
import argparse |
|
from preprocessing_utils import (smpl_to_pose, PerspectiveCamera, Renderer, render_trimesh, \ |
|
estimate_translation_cv2, transform_smpl) |
|
from loss import joints_2d_loss, pose_temporal_loss, get_loss_weights |
|
|
|
def main(args): |
|
device = torch.device("cuda:0") |
|
seq = args.seq |
|
gender = args.gender |
|
DIR = './raw_data' |
|
img_dir = f'{DIR}/{seq}/frames' |
|
romp_file_dir = f'{DIR}/{seq}/ROMP' |
|
img_paths = sorted(glob.glob(f"{img_dir}/*.png")) |
|
romp_file_paths = sorted(glob.glob(f"{romp_file_dir}/*.npz")) |
|
|
|
from smplx import SMPL |
|
smpl_model = SMPL('../code/lib/smpl/smpl_model', gender=gender).to(device) |
|
|
|
input_img = cv2.imread(img_paths[0]) |
|
if args.source == 'custom': |
|
focal_length = max(input_img.shape[0], input_img.shape[1]) |
|
cam_intrinsics = np.array([[focal_length, 0., input_img.shape[1]//2], |
|
[0., focal_length, input_img.shape[0]//2], |
|
[0., 0., 1.]]) |
|
elif args.source == 'neuman': |
|
NeuMan_DIR = '' |
|
with open(f'{NeuMan_DIR}/{seq}/sparse/cameras.txt') as f: |
|
lines = f.readlines() |
|
cam_params = lines[3].split() |
|
cam_intrinsics = np.array([[float(cam_params[4]), 0., float(cam_params[6])], |
|
[0., float(cam_params[5]), float(cam_params[7])], |
|
[0., 0., 1.]]) |
|
elif args.source == 'deepcap': |
|
DeepCap_DIR = '' |
|
with open(f'{DeepCap_DIR}/monocularCalibrationBM.calibration') as f: |
|
lines = f.readlines() |
|
|
|
cam_params = lines[5].split() |
|
cam_intrinsics = np.array([[float(cam_params[1]), 0., float(cam_params[3])], |
|
[0., float(cam_params[6]), float(cam_params[7])], |
|
[0., 0., 1.]]) |
|
else: |
|
print('Please specify the source of the dataset (custom, neuman, deepcap). We will continue to update the sources in the future.') |
|
raise NotImplementedError |
|
renderer = Renderer(img_size = [input_img.shape[0], input_img.shape[1]], cam_intrinsic=cam_intrinsics) |
|
|
|
if args.mode == 'mask': |
|
if not os.path.exists(f'{DIR}/{seq}/init_mask'): |
|
os.makedirs(f'{DIR}/{seq}/init_mask') |
|
elif args.mode == 'refine': |
|
if not os.path.exists(f'{DIR}/{seq}/init_refined_smpl'): |
|
os.makedirs(f'{DIR}/{seq}/init_refined_smpl') |
|
if not os.path.exists(f'{DIR}/{seq}/init_refined_mask'): |
|
os.makedirs(f'{DIR}/{seq}/init_refined_mask') |
|
if not os.path.exists(f'{DIR}/{seq}/init_refined_smpl_files'): |
|
os.makedirs(f'{DIR}/{seq}/init_refined_smpl_files') |
|
openpose_dir = f'{DIR}/{seq}/openpose' |
|
openpose_paths = sorted(glob.glob(f"{openpose_dir}/*.npy")) |
|
opt_num_iters=150 |
|
weight_dict = get_loss_weights() |
|
cam = PerspectiveCamera(focal_length_x=torch.tensor(cam_intrinsics[0, 0], dtype=torch.float32), |
|
focal_length_y=torch.tensor(cam_intrinsics[1, 1], dtype=torch.float32), |
|
center=torch.tensor(cam_intrinsics[0:2, 2]).unsqueeze(0)).to(device) |
|
mean_shape = [] |
|
smpl2op_mapping = torch.tensor(smpl_to_pose(model_type='smpl', use_hands=False, use_face=False, |
|
use_face_contour=False, openpose_format='coco25'), dtype=torch.long).cuda() |
|
elif args.mode == 'final': |
|
refined_smpl_dir = f'{DIR}/{seq}/init_refined_smpl_files' |
|
refined_smpl_mask_dir = f'{DIR}/{seq}/init_refined_mask' |
|
refined_smpl_paths = sorted(glob.glob(f"{refined_smpl_dir}/*.pkl")) |
|
refined_smpl_mask_paths = sorted(glob.glob(f"{refined_smpl_mask_dir}/*.png")) |
|
|
|
save_dir = f'../data/{seq}' |
|
if not os.path.exists(os.path.join(save_dir, 'image')): |
|
os.makedirs(os.path.join(save_dir, 'image')) |
|
if not os.path.exists(os.path.join(save_dir, 'mask')): |
|
os.makedirs(os.path.join(save_dir, 'mask')) |
|
|
|
scale_factor = args.scale_factor |
|
smpl_shape = np.load(f'{DIR}/{seq}/mean_shape.npy') |
|
T_hip = smpl_model.get_T_hip(betas=torch.tensor(smpl_shape)[None].float().to(device)).squeeze().cpu().numpy() |
|
|
|
K = np.eye(4) |
|
K[:3, :3] = cam_intrinsics |
|
K[0, 0] = K[0, 0] / scale_factor |
|
K[1, 1] = K[1, 1] / scale_factor |
|
K[0, 2] = K[0, 2] / scale_factor |
|
K[1, 2] = K[1, 2] / scale_factor |
|
|
|
dial_kernel = np.ones((20, 20),np.uint8) |
|
|
|
output_trans = [] |
|
output_pose = [] |
|
output_P = {} |
|
|
|
last_j3d = None |
|
actor_id = 0 |
|
cam_extrinsics = np.eye(4) |
|
R = torch.tensor(cam_extrinsics[:3,:3])[None].float() |
|
T = torch.tensor(cam_extrinsics[:3, 3])[None].float() |
|
for idx, img_path in enumerate(tqdm(img_paths)): |
|
input_img = cv2.imread(img_path) |
|
if args.mode == 'mask' or args.mode == 'refine': |
|
seq_file = np.load(romp_file_paths[idx], allow_pickle=True)['results'][()] |
|
|
|
|
|
if len(seq_file['smpl_thetas']) >= 2: |
|
dist = [] |
|
if idx == 0: |
|
last_j3d = seq_file['joints'][actor_id] |
|
for i in range(len(seq_file['smpl_thetas'])): |
|
dist.append(np.linalg.norm(seq_file['joints'][i].mean(0) - last_j3d.mean(0, keepdims=True))) |
|
actor_id = np.argmin(dist) |
|
smpl_verts = seq_file['verts'][actor_id] |
|
pj2d_org = seq_file['pj2d_org'][actor_id] |
|
joints3d = seq_file['joints'][actor_id] |
|
last_j3d = joints3d.copy() |
|
tra_pred = estimate_translation_cv2(joints3d, pj2d_org, proj_mat=cam_intrinsics) |
|
|
|
smpl_verts += tra_pred |
|
|
|
if args.mode == 'refine': |
|
openpose = np.load(openpose_paths[idx]) |
|
openpose_j2d = torch.tensor(openpose[:, :2][None], dtype=torch.float32, requires_grad=False, device=device) |
|
openpose_conf = torch.tensor(openpose[:, -1][None], dtype=torch.float32, requires_grad=False, device=device) |
|
|
|
smpl_shape = seq_file['smpl_betas'][actor_id][:10] |
|
smpl_pose = seq_file['smpl_thetas'][actor_id] |
|
smpl_trans = tra_pred |
|
|
|
opt_betas = torch.tensor(smpl_shape[None], dtype=torch.float32, requires_grad=True, device=device) |
|
opt_pose = torch.tensor(smpl_pose[None], dtype=torch.float32, requires_grad=True, device=device) |
|
opt_trans = torch.tensor(smpl_trans[None], dtype=torch.float32, requires_grad=True, device=device) |
|
|
|
opt_params = [{'params': opt_betas, 'lr': 1e-3}, |
|
{'params': opt_pose, 'lr': 1e-3}, |
|
{'params': opt_trans, 'lr': 1e-3}] |
|
optimizer = torch.optim.Adam(opt_params, lr=2e-3, betas=(0.9, 0.999)) |
|
if idx == 0: |
|
last_pose = [opt_pose.detach().clone()] |
|
loop = tqdm(range(opt_num_iters)) |
|
for it in loop: |
|
optimizer.zero_grad() |
|
|
|
smpl_output = smpl_model(betas=opt_betas, |
|
body_pose=opt_pose[:,3:], |
|
global_orient=opt_pose[:,:3], |
|
transl=opt_trans) |
|
smpl_verts = smpl_output.vertices.data.cpu().numpy().squeeze() |
|
|
|
smpl_joints_2d = cam(torch.index_select(smpl_output.joints, 1, smpl2op_mapping)) |
|
|
|
loss = dict() |
|
loss['J2D_Loss'] = joints_2d_loss(openpose_j2d, smpl_joints_2d, openpose_conf) |
|
loss['Temporal_Loss'] = pose_temporal_loss(last_pose[0], opt_pose) |
|
w_loss = dict() |
|
for k in loss: |
|
w_loss[k] = weight_dict[k](loss[k], it) |
|
|
|
tot_loss = list(w_loss.values()) |
|
tot_loss = torch.stack(tot_loss).sum() |
|
tot_loss.backward() |
|
optimizer.step() |
|
|
|
l_str = 'Iter: %d' % it |
|
for k in loss: |
|
l_str += ', %s: %0.4f' % (k, weight_dict[k](loss[k], it).mean().item()) |
|
loop.set_description(l_str) |
|
|
|
smpl_mesh = trimesh.Trimesh(smpl_verts, smpl_model.faces, process=False) |
|
R = torch.tensor(cam_extrinsics[:3,:3])[None].float() |
|
T = torch.tensor(cam_extrinsics[:3, 3])[None].float() |
|
rendered_image = render_trimesh(renderer, smpl_mesh, R, T, 'n') |
|
if input_img.shape[0] < input_img.shape[1]: |
|
rendered_image = rendered_image[abs(input_img.shape[0]-input_img.shape[1])//2:(input_img.shape[0]+input_img.shape[1])//2,...] |
|
else: |
|
rendered_image = rendered_image[:,abs(input_img.shape[0]-input_img.shape[1])//2:(input_img.shape[0]+input_img.shape[1])//2] |
|
valid_mask = (rendered_image[:,:,-1] > 0)[:, :, np.newaxis] |
|
|
|
if args.mode == 'mask': |
|
cv2.imwrite(os.path.join(f'{DIR}/{seq}/init_mask', '%04d.png' % idx), valid_mask*255) |
|
elif args.mode == 'refine': |
|
output_img = (rendered_image[:,:,:-1] * valid_mask + input_img * (1 - valid_mask)).astype(np.uint8) |
|
cv2.imwrite(os.path.join(f'{DIR}/{seq}/init_refined_smpl', '%04d.png' % idx), output_img) |
|
cv2.imwrite(os.path.join(f'{DIR}/{seq}/init_refined_mask', '%04d.png' % idx), valid_mask*255) |
|
last_pose.pop(0) |
|
last_pose.append(opt_pose.detach().clone()) |
|
smpl_dict = {} |
|
smpl_dict['pose'] = opt_pose.data.squeeze().cpu().numpy() |
|
smpl_dict['trans'] = opt_trans.data.squeeze().cpu().numpy() |
|
smpl_dict['shape'] = opt_betas.data.squeeze().cpu().numpy() |
|
|
|
mean_shape.append(smpl_dict['shape']) |
|
pkl.dump(smpl_dict, open(os.path.join(f'{DIR}/{seq}/init_refined_smpl_files', '%04d.pkl' % idx), 'wb')) |
|
elif args.mode == 'final': |
|
input_img = cv2.resize(input_img, (input_img.shape[1] // scale_factor, input_img.shape[0] // scale_factor)) |
|
seq_file = pkl.load(open(refined_smpl_paths[idx], 'rb')) |
|
|
|
mask = cv2.imread(refined_smpl_mask_paths[idx]) |
|
mask = cv2.resize(mask, (mask.shape[1] // scale_factor, mask.shape[0] // scale_factor)) |
|
|
|
|
|
mask = cv2.dilate(mask, dial_kernel) |
|
|
|
cv2.imwrite(os.path.join(save_dir, 'image/%04d.png' % idx), input_img) |
|
cv2.imwrite(os.path.join(save_dir, 'mask/%04d.png' % idx), mask) |
|
|
|
smpl_pose = seq_file['pose'] |
|
smpl_trans = seq_file['trans'] |
|
|
|
|
|
target_extrinsic = np.eye(4) |
|
target_extrinsic[1:3] *= -1 |
|
target_extrinsic, smpl_pose, smpl_trans = transform_smpl(cam_extrinsics, target_extrinsic, smpl_pose, smpl_trans, T_hip) |
|
smpl_output = smpl_model(betas=torch.tensor(smpl_shape)[None].float().to(device), |
|
body_pose=torch.tensor(smpl_pose[3:])[None].float().to(device), |
|
global_orient=torch.tensor(smpl_pose[:3])[None].float().to(device), |
|
transl=torch.tensor(smpl_trans)[None].float().to(device)) |
|
smpl_verts = smpl_output.vertices.data.cpu().numpy().squeeze() |
|
|
|
|
|
v_max = smpl_verts.max(axis=0) |
|
v_min = smpl_verts.min(axis=0) |
|
normalize_shift = -(v_max + v_min) / 2. |
|
|
|
trans = smpl_trans + normalize_shift |
|
|
|
target_extrinsic[:3, -1] = target_extrinsic[:3, -1] - (target_extrinsic[:3, :3] @ normalize_shift) |
|
|
|
P = K @ target_extrinsic |
|
output_trans.append(trans) |
|
output_pose.append(smpl_pose) |
|
output_P[f"cam_{idx}"] = P |
|
|
|
if args.mode == 'refine': |
|
mean_shape = np.array(mean_shape) |
|
np.save(f'{DIR}/{seq}/mean_shape.npy', mean_shape.mean(0)) |
|
if args.mode == 'final': |
|
np.save(os.path.join(save_dir, 'poses.npy'), np.array(output_pose)) |
|
np.save(os.path.join(save_dir, 'mean_shape.npy'), smpl_shape) |
|
np.save(os.path.join(save_dir, 'normalize_trans.npy'), np.array(output_trans)) |
|
np.savez(os.path.join(save_dir, "cameras.npz"), **output_P) |
|
|
|
if __name__ == '__main__': |
|
parser = argparse.ArgumentParser(description="Preprocessing data") |
|
|
|
parser.add_argument('--source', type=str, default='custom', help="custom video or dataset video") |
|
|
|
parser.add_argument('--seq', type=str) |
|
|
|
parser.add_argument('--gender', type=str, help="gender of the actor: MALE or FEMALE") |
|
|
|
parser.add_argument('--mode', type=str, help="mask mode or refine mode: mask or refine or final") |
|
|
|
parser.add_argument('--scale_factor', type=int, default=2, help="scale factor for the input image") |
|
args = parser.parse_args() |
|
main(args) |