import os os.environ["CDF_LIB"] = "/is/cluster/scratch/stripathi/data/cdf37_1-dist/src/lib" import cv2 import pandas as pd import json import glob import h5py import torch import trimesh import numpy as np import pickle as pkl from xml.dom import minidom import xml.etree.ElementTree as ET from tqdm import tqdm from spacepy import pycdf # from .read_openpose import read_openpose import sys sys.path.append('../../') from models import hmr, SMPL import config import constants import shutil import smplx import pytorch3d.transforms as p3dt from utils.geometry import batch_rodrigues, batch_rot2aa, ea2rm model_type = 'smplx' model_folder = '/ps/project/common/smplifyx/models/' body_model_params = dict(model_path=model_folder, model_type=model_type, create_global_orient=True, create_body_pose=True, create_betas=True, num_betas=10, create_left_hand_pose=True, create_right_hand_pose=True, create_expression=True, create_jaw_pose=True, create_leye_pose=True, create_reye_pose=True, create_transl=True, use_pca=False) body_model = smplx.create(gender='neutral', **body_model_params).to('cuda') def rich_extract(img_dataset_path, out_path, split=None, vis_path=None, visualize=False, downsample_factor=4): # structs we use imgnames_ = [] poses_, shapes_, transls_ = [], [], [] cams_k_ = [] contact_label_ = [] scene_seg_, part_seg_ = [], [] for i, fl in tqdm(enumerate(sorted(os.listdir(os.path.join(img_dataset_path, 'images', split)))), dynamic_ncols=True): ind = fl.index('cam') location = fl[:ind-1] cam_num = fl[ind:ind+6] img = fl[ind+7:-3] + 'jpeg' imgname = os.path.join(location, cam_num, img) mask_name = fl sp = mask_name.split('_') indx = mask_name.index('cam') st = mask_name[indx-1:indx+7] mask_name = mask_name.replace(st, '/') mask_name = mask_name[:-7] new_p = mask_name.split('/') mask_name = new_p[0] + '/' + new_p[1] + '/' + sp[1] + '.pkl' mask_path = os.path.join(img_dataset_path, 'labels', split, mask_name) df = pd.read_pickle(mask_path) mask = df['contact'] scene_path = os.path.join(img_dataset_path, 'segmentation_masks', split, fl[:-3] + 'png') part_path = os.path.join(img_dataset_path, 'parts', split, fl[:-3] + 'png') dataset_path = '/ps/project/datasets/RICH' ind = fl.index('cam') frame_id = fl[:ind-1] location = frame_id.split('_')[0] if location == 'LectureHall': if 'chair' in frame_id: cam2world_location = location + '_' + 'chair' else: cam2world_location = location + '_' + 'yoga' else: cam2world_location = location img_num = fl.split('_')[-2] cam_num = int(fl.split('_')[-1][:2]) # get ioi2scan transformation per sequence ioi2scan_fn = os.path.join(dataset_path, 'website_release/multicam2world', cam2world_location + '_multicam2world.json') try: camera_fn = os.path.join(dataset_path, 'rich_toolkit/data/scan_calibration', location, f'calibration/{cam_num:03d}.xml') focal_length_x, focal_length_y, camC, camR, camT, _, _, _ = extract_cam_param_xml(camera_fn) except: print(f'camera calibration file not found: {camera_fn}') continue # path to smpl params smplx_param = os.path.join(dataset_path, 'rich_toolkit/data/bodies', split, frame_id, str(img_num), frame_id.split('_')[1] + '.pkl') # get smpl parameters ## body resides in multi-ioi coordidate, where camera 0 is world zero. with open(smplx_param, 'rb') as f: body_params = pkl.load(f) # in ioi coordinates: cam 0 beta = body_params['betas'] pose_aa = body_params['body_pose'] pose_rotmat = p3dt.axis_angle_to_matrix(torch.FloatTensor(pose_aa.reshape(-1,3))).numpy() transl = body_params['transl'] global_orient = body_params['global_orient'] global_orient = p3dt.axis_angle_to_matrix(torch.FloatTensor(global_orient.reshape(-1,3))).numpy() smpl_body_cam0 = body_model(betas=torch.FloatTensor(beta).to('cuda')) # canonical body with shape vertices_cam0 = smpl_body_cam0.vertices.detach().cpu().numpy().squeeze() joints_cam0 = smpl_body_cam0.joints.detach().cpu().numpy() pelvis_cam0 = joints_cam0[:, 0, :] # ## rigid transformation between multi-ioi and Leica scan (world) with open(ioi2scan_fn, 'r') as f: ioi2scan_dict = json.load(f) R_ioi2world = np.array(ioi2scan_dict['R']) # Note: R is transposed t_ioi2world= np.array(ioi2scan_dict['t']).reshape(1, 3) # # get SMPL params in camera coordinates global_orient_cam = np.matmul(np.array(camR), global_orient) full_pose_rotmat_cam = np.concatenate((global_orient_cam, pose_rotmat), axis=0).squeeze() theta_cam = batch_rot2aa(torch.FloatTensor(full_pose_rotmat_cam)).reshape(-1, 66).cpu().numpy() # read GT 2D keypoints K = np.eye(3, dtype=np.float64) K[0, 0] = focal_length_x / downsample_factor K[1, 1] = focal_length_y / downsample_factor K[:2, 2:] = camC.T / downsample_factor # get camera parameters wrt to scan R_worldtocam = np.matmul(camR, R_ioi2world) # Note: R_ioi2world is transposed T_worldtocam = -t_ioi2world + camT # store data imgnames_.append(os.path.join('/ps/project/datasets/RICH_JPG', split, imgname)) contact_label_.append(mask) scene_seg_.append(scene_path) part_seg_.append(part_path) poses_.append(theta_cam.squeeze()) transls_.append(transl.squeeze()) shapes_.append(beta.squeeze()) cams_k_.append(K.tolist()) # store the data struct if not os.path.isdir(out_path): os.makedirs(out_path) out_file = os.path.join(out_path, f'rich_{split}_smplx.npz') np.savez(out_file, imgname=imgnames_, pose=poses_, transl=transls_, shape=shapes_, cam_k=cams_k_, contact_label=contact_label_, scene_seg=scene_seg_, part_seg=part_seg_ ) print('Saved to ', out_file) def rectify_pose(camera_r, body_aa): body_r = batch_rodrigues(body_aa).reshape(-1,3,3) final_r = camera_r @ body_r body_aa = batch_rot2aa(final_r) return body_aa def extract_cam_param_xml(xml_path: str = '', dtype=float): import xml.etree.ElementTree as ET tree = ET.parse(xml_path) extrinsics_mat = [float(s) for s in tree.find('./CameraMatrix/data').text.split()] intrinsics_mat = [float(s) for s in tree.find('./Intrinsics/data').text.split()] distortion_vec = [float(s) for s in tree.find('./Distortion/data').text.split()] focal_length_x = intrinsics_mat[0] focal_length_y = intrinsics_mat[4] center = np.array([[intrinsics_mat[2], intrinsics_mat[5]]], dtype=dtype) rotation = np.array([[extrinsics_mat[0], extrinsics_mat[1], extrinsics_mat[2]], [extrinsics_mat[4], extrinsics_mat[5], extrinsics_mat[6]], [extrinsics_mat[8], extrinsics_mat[9], extrinsics_mat[10]]], dtype=dtype) translation = np.array([[extrinsics_mat[3], extrinsics_mat[7], extrinsics_mat[11]]], dtype=dtype) # t = -Rc --> c = -R^Tt cam_center = [-extrinsics_mat[0] * extrinsics_mat[3] - extrinsics_mat[4] * extrinsics_mat[7] - extrinsics_mat[8] * extrinsics_mat[11], -extrinsics_mat[1] * extrinsics_mat[3] - extrinsics_mat[5] * extrinsics_mat[7] - extrinsics_mat[9] * extrinsics_mat[11], -extrinsics_mat[2] * extrinsics_mat[3] - extrinsics_mat[6] * extrinsics_mat[7] - extrinsics_mat[10] * extrinsics_mat[11]] cam_center = np.array([cam_center], dtype=dtype) k1 = np.array([distortion_vec[0]], dtype=dtype) k2 = np.array([distortion_vec[1]], dtype=dtype) return focal_length_x, focal_length_y, center, rotation, translation, cam_center, k1, k2 rich_extract(img_dataset_path='/is/cluster/work/achatterjee/rich', out_path='/is/cluster/work/achatterjee/rich/npzs', split='train') rich_extract(img_dataset_path='/is/cluster/work/achatterjee/rich', out_path='/is/cluster/work/achatterjee/rich/npzs', split='val') rich_extract(img_dataset_path='/is/cluster/work/achatterjee/rich', out_path='/is/cluster/work/achatterjee/rich/npzs', split='test')