import torch import torch.nn.functional as F import cv2 as cv import numpy as np import os from glob import glob from icecream import ic from scipy.spatial.transform import Rotation as Rot from scipy.spatial.transform import Slerp # This function is borrowed from IDR: https://github.com/lioryariv/idr def load_K_Rt_from_P(filename, P=None): if P is None: lines = open(filename).read().splitlines() if len(lines) == 4: lines = lines[1:] lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] P = np.asarray(lines).astype(np.float32).squeeze() out = cv.decomposeProjectionMatrix(P) K = out[0] R = out[1] t = out[2] K = K / K[2, 2] intrinsics = np.eye(4) intrinsics[:3, :3] = K pose = np.eye(4, dtype=np.float32) pose[:3, :3] = R.transpose() pose[:3, 3] = (t[:3] / t[3])[:, 0] return intrinsics, pose def filter_iamges_via_pixel_values(data_dir): images_lis = sorted(glob(os.path.join(data_dir, 'image/*.png'))) ## images lis ## n_images = len(images_lis) images_np = np.stack([cv.imread(im_name) for im_name in images_lis]) / 255.0 print(f"images_np: {images_np.shape}") # nn_frames x res x res x 3 # images_np = 1. - images_np has_density_values = (np.sum(images_np, axis=-1) > 0.7).astype(np.float32) has_density_values = np.sum(np.sum(has_density_values, axis=-1), axis=-1) tot_res_nns = float(images_np.shape[1] * images_np.shape[2]) has_density_ratio = has_density_values / tot_res_nns ### has density ratio and ratio # print(f"has_density_values: {has_density_values.shape}") paried_has_density_ratio_list = [(i_fr, has_density_ratio[i_fr].item()) for i_fr in range(has_density_ratio.shape[0])] paried_has_density_ratio_list = sorted(paried_has_density_ratio_list, key=lambda ii: ii[1], reverse=True) mid_rnk_value = len(paried_has_density_ratio_list) // 4 print(f"mid value of the density ratio") print(paried_has_density_ratio_list[mid_rnk_value]) iamge_idx = paried_has_density_ratio_list[mid_rnk_value][0] print(f"iamge idx: {images_lis[iamge_idx]}") print(paried_has_density_ratio_list[:mid_rnk_value]) tot_selected_img_idx_list = [ii[0] for ii in paried_has_density_ratio_list[:mid_rnk_value]] tot_selected_img_idx_list =sorted(tot_selected_img_idx_list) print(len(tot_selected_img_idx_list)) # print(tot_selected_img_idx_list[54]) print(tot_selected_img_idx_list) class Dataset: def __init__(self, conf, time_idx, mode='train'): super(Dataset, self).__init__() print('Load data: Begin') self.device = torch.device('cuda') self.conf = conf self.selected_img_idxes_list = [0, 1, 5, 6, 7, 8, 9, 13, 14, 15, 35, 36, 42, 43, 44, 48, 49, 50, 51, 55, 56, 57, 61, 62, 63, 69, 84, 90, 91, 92, 96, 97] # self.selected_img_idxes_list = [0, 1, 5, 6, 7, 8, 9, 12, 13, 14, 15, 20, 21, 22, 23, 26, 27, 28, 29, 35, 36, 37, 40, 41, 70, 71, 79, 82, 83, 84, 85, 92, 93, 96, 97, 98, 99, 105, 106, 107, 110, 111, 112, 113, 118, 119, 120, 121, 124, 125, 133, 134, 135, 139, 174, 175, 176, 177, 180, 188, 189, 190, 191, 194, 195] self.selected_img_idxes_list = [0, 1, 6, 7, 8, 9, 12, 13, 14, 15, 20, 21, 22, 23, 26, 27, 36, 40, 41, 70, 71, 78, 82, 83, 84, 85, 90, 91, 92, 93, 96, 97] self.selected_img_idxes_list = [0, 1, 6, 7, 8, 9, 12, 13, 14, 15, 20, 21, 22, 23, 26, 27, 36, 40, 41, 70, 71, 78, 82, 83, 84, 85, 90, 91, 92, 93, 96, 97, 98, 99, 104, 105, 106, 107, 110, 111, 112, 113, 118, 119, 120, 121, 124, 125, 134, 135, 139, 174, 175, 176, 177, 180, 181, 182, 183, 188, 189, 190, 191, 194, 195] # selected img idxes list # self.selected_img_idxes_list = [0, 1, 6, 7, 8, 9, 12, 13, 14, 20, 21, 22, 23, 26, 27, 70, 78, 83, 84, 85, 91, 92, 93, 96, 97, 98, 99, 105, 106, 107, 110, 111, 112, 113, 119, 120, 121, 124, 125, 175, 176, 181, 182, 188, 189, 190, 191, 194, 195] # or the timestep to the dataset instance ## # selected img idxes list # self.selected_img_idxes = np.array(self.selected_img_idxes_list).astype(np.int32) self.data_dir = conf.get_string('data_dir') self.data_dir = os.path.join(self.data_dir, f"{time_idx}") # the time_idx # self.render_cameras_name = conf.get_string('render_cameras_name') self.object_cameras_name = conf.get_string('object_cameras_name') ## camera outside sphere ## self.camera_outside_sphere = conf.get_bool('camera_outside_sphere', default=True) self.scale_mat_scale = conf.get_float('scale_mat_scale', default=1.1) camera_dict = np.load(os.path.join(self.data_dir, self.render_cameras_name)) # camera_dict = np.load("/home/xueyi/diffsim/NeuS/public_data/dtu_scan24/cameras_sphere.npz") self.camera_dict = camera_dict # rendr camera dict # # render camera dict # # number of pixels in the views -> very thin geometry is not useful self.images_lis = sorted(glob(os.path.join(self.data_dir, 'image/*.png'))) # iamges_lis # and the images_lis and the images_lis # # self.images_lis = self.images_lis[:1] # totoal views and poses of the camera; # and select cameras for rendering # self.n_images = len(self.images_lis) if mode == 'train_from_model_rules': self.images_np = cv.imread(self.images_lis[0]) / 256.0 print(self.images_np.shape) self.images_np = np.reshape(self.images_np, (1, self.images_np.shape[0], self.images_np.shape[1], self.images_np.shape[2])) self.images_np = [self.images_np for _ in range(len(self.images_lis))] self.images_np = np.concatenate(self.images_np, axis=0) else: presaved_imags_npy_fn = os.path.join(self.data_dir, "processed_images.npy") if not os.path.exists(presaved_imags_npy_fn): self.images_np = [] for i_im_idx, im_name in enumerate(self.images_lis): print(f"loading {i_im_idx} / {len(self.images_lis)}") cur_im = cv.imread(im_name) # for im_name in self.images_lis self.images_np.append(cur_im) self.images_np = np.stack(self.images_np) / 256.0 np.save(presaved_imags_npy_fn, self.images_np) else: print(f"Loading from {presaved_imags_npy_fn}") self.images_np = np.load(presaved_imags_npy_fn, allow_pickle=True) # self.images_np = np.stack([cv.imread(im_name) for im_name in self.images_lis]) / 256.0 # self.selected_img_idxes_list = list(range(self.images_np.shape[0])) # self.selected_img_idxes = np.array(self.selected_img_idxes_list).astype(np.int32) # get self.images_np = self.images_np[self.selected_img_idxes] ## get selected iamges_np # ### if we deal with the backgound carefully ### ### get # self.images_np = np.stack([cv.imread(im_name) for im_name in self.images_lis]) / 255.0 # self.images_np = self.images_np[self.selected_img_idxes] self.images_np = 1. - self.images_np ### self.masks_lis = sorted(glob(os.path.join(self.data_dir, 'mask/*.png'))) if mode == 'train_from_model_rules': self.masks_np = cv.imread(self.masks_lis[0]) / 256.0 print("masks shape:", self.masks_np.shape) self.masks_np = np.reshape(self.masks_np, (1, self.masks_np.shape[0], self.masks_np.shape[1], self.masks_np.shape[2])) # .repeat(len(self.masks_lis), 1, 1) self.masks_np = [self.masks_np for _ in range(len(self.masks_lis))] self.masks_np = np.concatenate(self.masks_np, axis=0) else: presaved_masks_npy_fn = os.path.join(self.data_dir, "processed_masks.npy") # self.masks_lis = self.masks_lis[:1] if not os.path.exists(presaved_masks_npy_fn): try: self.masks_np = np.stack([cv.imread(im_name) for im_name in self.masks_lis]) / 256.0 self.masks_np = self.masks_np[self.selected_img_idxes] except: self.masks_np = self.images_np.copy() np.save(presaved_masks_npy_fn, self.masks_np) else: print(f"Loading from {presaved_masks_npy_fn}") self.masks_np = np.load(presaved_masks_npy_fn, allow_pickle=True) # world_mat is a projection matrix from world to image self.world_mats_np = [camera_dict['world_mat_%d' % idx].astype(np.float32) for idx in range(self.n_images)] self.scale_mats_np = [] # scale_mat: used for coordinate normalization, we assume the scene to render is inside a unit sphere at origin. self.scale_mats_np = [camera_dict['scale_mat_%d' % idx].astype(np.float32) for idx in range(self.n_images)] self.intrinsics_all = [] self.pose_all = [] # for idx, (scale_mat, world_mat) in enumerate(zip(self.scale_mats_np, self.world_mats_np)): for idx in self.selected_img_idxes_list: scale_mat = self.scale_mats_np[idx] world_mat = self.world_mats_np[idx] if "hand" in self.data_dir: intrinsics = np.eye(4) fov = 512. / 2. # * 2 res = 512. intrinsics[:3, :3] = np.array([ [fov, 0, 0.5* res], # res # [0, fov, 0.5* res], # res # [0, 0, 1] ], dtype=np.float32) pose = camera_dict['camera_mat_%d' % idx].astype(np.float32) else: P = world_mat @ scale_mat P = P[:3, :4] intrinsics, pose = load_K_Rt_from_P(None, P) self.intrinsics_all.append(torch.from_numpy(intrinsics).float()) self.pose_all.append(torch.from_numpy(pose).float()) ### images, masks, self.images = torch.from_numpy(self.images_np.astype(np.float32)).cpu() # [n_images, H, W, 3] # self.masks = torch.from_numpy(self.masks_np.astype(np.float32)).cpu() # [n_images, H, W, 3] # self.intrinsics_all = torch.stack(self.intrinsics_all).to(self.device) # [n_images, 4, 4] # optimize sdf field # rigid model hand self.intrinsics_all_inv = torch.inverse(self.intrinsics_all) # [n_images, 4, 4] self.focal = self.intrinsics_all[0][0, 0] self.pose_all = torch.stack(self.pose_all).to(self.device) # [n_images, 4, 4] self.H, self.W = self.images.shape[1], self.images.shape[2] self.image_pixels = self.H * self.W object_bbox_min = np.array([-1.01, -1.01, -1.01, 1.0]) object_bbox_max = np.array([ 1.01, 1.01, 1.01, 1.0]) # Object scale mat: region of interest to **extract mesh** object_scale_mat = np.load(os.path.join(self.data_dir, self.object_cameras_name))['scale_mat_0'] object_bbox_min = np.linalg.inv(self.scale_mats_np[0]) @ object_scale_mat @ object_bbox_min[:, None] object_bbox_max = np.linalg.inv(self.scale_mats_np[0]) @ object_scale_mat @ object_bbox_max[:, None] self.object_bbox_min = object_bbox_min[:3, 0] self.object_bbox_max = object_bbox_max[:3, 0] self.n_images = self.images.size(0) print('Load data: End') def get_rays(H, W, K, c2w, inverse_y, flip_x, flip_y, mode='center'): i, j = torch.meshgrid( # meshgrid # torch.linspace(0, W-1, W, device=c2w.device), torch.linspace(0, H-1, H, device=c2w.device)) i = i.t().float() j = j.t().float() if mode == 'lefttop': pass elif mode == 'center': i, j = i+0.5, j+0.5 elif mode == 'random': i = i+torch.rand_like(i) j = j+torch.rand_like(j) else: raise NotImplementedError if flip_x: i = i.flip((1,)) if flip_y: j = j.flip((0,)) if inverse_y: dirs = torch.stack([(i-K[0][2])/K[0][0], (j-K[1][2])/K[1][1], torch.ones_like(i)], -1) else: dirs = torch.stack([(i-K[0][2])/K[0][0], -(j-K[1][2])/K[1][1], -torch.ones_like(i)], -1) # Rotate ray directions from camera frame to the world frame rays_d = torch.sum(dirs[..., np.newaxis, :] * c2w[:3,:3], -1) # dot product, equals to: [c2w.dot(dir) for dir in dirs] # Translate camera frame's origin to the world frame. It is the origin of all rays. rays_o = c2w[:3,3].expand(rays_d.shape) return rays_o, rays_d def gen_rays_at(self, img_idx, resolution_level=1): """ Generate rays at world space from one camera. """ l = resolution_level tx = torch.linspace(0, self.W - 1, self.W // l) ty = torch.linspace(0, self.H - 1, self.H // l) pixels_x, pixels_y = torch.meshgrid(tx, ty) ##### previous method ##### # p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1) # W, H, 3 # # p = torch.stack([pixels_x, pixels_y, -1. * torch.ones_like(pixels_y)], dim=-1) # W, H, 3 # p = torch.matmul(self.intrinsics_all_inv[img_idx, None, None, :3, :3], p[:, :, :, None]).squeeze() # W, H, 3 # rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # W, H, 3 # rays_v = torch.matmul(self.pose_all[img_idx, None, None, :3, :3], rays_v[:, :, :, None]).squeeze() # W, H, 3 # rays_o = self.pose_all[img_idx, None, None, :3, 3].expand(rays_v.shape) # W, H, 3 ##### previous method ##### fov = 512.; res = 512. K = np.array([ [fov, 0, 0.5* res], [0, fov, 0.5* res], [0, 0, 1] ], dtype=np.float32) K = torch.from_numpy(K).float().cuda() # ### `center` mode ### # c2w = self.pose_all[img_idx] pixels_x, pixels_y = pixels_x+0.5, pixels_y+0.5 dirs = torch.stack([(pixels_x-K[0][2])/K[0][0], -(pixels_y-K[1][2])/K[1][1], -torch.ones_like(pixels_x)], -1) rays_v = torch.sum(dirs[..., np.newaxis, :] * c2w[:3,:3], -1) rays_o = c2w[:3,3].expand(rays_v.shape) # dirs = torch.stack([(i-K[0][2])/K[0][0], -(j-K[1][2])/K[1][1], -torch.ones_like(i)], -1) # p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1) # W, H, 3 # # p = torch.stack([pixels_x, pixels_y, -1. * torch.ones_like(pixels_y)], dim=-1) # W, H, 3 # p = torch.matmul(self.intrinsics_all_inv[img_idx, None, None, :3, :3], p[:, :, :, None]).squeeze() # W, H, 3 # rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # W, H, 3 # rays_v = torch.matmul(self.pose_all[img_idx, None, None, :3, :3], rays_v[:, :, :, None]).squeeze() # W, H, 3 # rays_o = self.pose_all[img_idx, None, None, :3, 3].expand(rays_v.shape) # W, H, 3 return rays_o.transpose(0, 1), rays_v.transpose(0, 1) def gen_random_rays_at(self, img_idx, batch_size): """ Generate random rays at world space from one camera. """ img_idx = img_idx.cpu() pixels_x = torch.randint(low=0, high=self.W, size=[batch_size]).cpu() pixels_y = torch.randint(low=0, high=self.H, size=[batch_size]).cpu() # print(self.images.device, img_idx.device, pixels_y.device) color = self.images[img_idx][(pixels_y, pixels_x)] # batch_size, 3 mask = self.masks[img_idx][(pixels_y, pixels_x)] # batch_size, 3 ##### previous method ##### # p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1).float() # batch_size, 3 # # p = torch.stack([pixels_x, pixels_y, -1. * torch.ones_like(pixels_y)], dim=-1).float() # batch_size, 3 # p = torch.matmul(self.intrinsics_all_inv[img_idx, None, :3, :3], p[:, :, None]).squeeze() # batch_size, 3 # rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # batch_size, 3 # rays_v = torch.matmul(self.pose_all[img_idx, None, :3, :3], rays_v[:, :, None]).squeeze() # batch_size, 3 # rays_o = self.pose_all[img_idx, None, :3, 3].expand(rays_v.shape) # batch_size, 3 ##### previous method ##### fov = 512.; res = 512. K = np.array([ [fov, 0, 0.5* res], [0, fov, 0.5* res], [0, 0, 1] ], dtype=np.float32) K = torch.from_numpy(K).float().cuda() # ### `center` mode ### # c2w = self.pose_all[img_idx] pixels_x = pixels_x.cuda() pixels_y = pixels_y.cuda() pixels_x, pixels_y = pixels_x+0.5, pixels_y+0.5 dirs = torch.stack([(pixels_x-K[0][2])/K[0][0], -(pixels_y-K[1][2])/K[1][1], -torch.ones_like(pixels_x)], -1) rays_v = torch.sum(dirs[..., np.newaxis, :] * c2w[:3,:3], -1) rays_o = c2w[:3,3].expand(rays_v.shape) return torch.cat([rays_o.cpu(), rays_v.cpu(), color, mask[:, :1]], dim=-1).cuda() # batch_size, 10 def gen_rays_between(self, idx_0, idx_1, ratio, resolution_level=1): """ Interpolate pose between two cameras. """ l = resolution_level tx = torch.linspace(0, self.W - 1, self.W // l) ty = torch.linspace(0, self.H - 1, self.H // l) pixels_x, pixels_y = torch.meshgrid(tx, ty) p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1) # W, H, 3 p = torch.matmul(self.intrinsics_all_inv[0, None, None, :3, :3], p[:, :, :, None]).squeeze() # W, H, 3 rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # W, H, 3 trans = self.pose_all[idx_0, :3, 3] * (1.0 - ratio) + self.pose_all[idx_1, :3, 3] * ratio pose_0 = self.pose_all[idx_0].detach().cpu().numpy() pose_1 = self.pose_all[idx_1].detach().cpu().numpy() pose_0 = np.linalg.inv(pose_0) pose_1 = np.linalg.inv(pose_1) rot_0 = pose_0[:3, :3] rot_1 = pose_1[:3, :3] rots = Rot.from_matrix(np.stack([rot_0, rot_1])) key_times = [0, 1] slerp = Slerp(key_times, rots) rot = slerp(ratio) pose = np.diag([1.0, 1.0, 1.0, 1.0]) pose = pose.astype(np.float32) pose[:3, :3] = rot.as_matrix() pose[:3, 3] = ((1.0 - ratio) * pose_0 + ratio * pose_1)[:3, 3] pose = np.linalg.inv(pose) rot = torch.from_numpy(pose[:3, :3]).cuda() trans = torch.from_numpy(pose[:3, 3]).cuda() rays_v = torch.matmul(rot[None, None, :3, :3], rays_v[:, :, :, None]).squeeze() # W, H, 3 rays_o = trans[None, None, :3].expand(rays_v.shape) # W, H, 3 return rays_o.transpose(0, 1), rays_v.transpose(0, 1) def near_far_from_sphere(self, rays_o, rays_d): a = torch.sum(rays_d**2, dim=-1, keepdim=True) b = 2.0 * torch.sum(rays_o * rays_d, dim=-1, keepdim=True) mid = 0.5 * (-b) / a near = mid - 1.0 far = mid + 1.0 return near, far def image_at(self, idx, resolution_level): if self.selected_img_idxes_list is not None: img = cv.imread(self.images_lis[self.selected_img_idxes_list[idx]]) else: img = cv.imread(self.images_lis[idx]) return (cv.resize(img, (self.W // resolution_level, self.H // resolution_level))).clip(0, 255) if __name__=='__main__': data_dir = "/data/datasets/genn/diffsim/diffredmax/save_res/goal_optimize_model_hand_sphere_test_obj_type_active_nfr_10_view_divide_0.5_n_views_7_three_planes_False_recon_dvgo_new_Nposes_7_routine_2" data_dir = "/data/datasets/genn/diffsim/neus/public_data/hand_test" data_dir = "/data2/datasets/diffsim/neus/public_data/hand_test_routine_2" data_dir = "/data2/datasets/diffsim/neus/public_data/hand_test_routine_2_light_color" filter_iamges_via_pixel_values(data_dir=data_dir)