diff --git a/SparseNeuS_demo_v1/confs/blender_general_lod1_val_new.conf b/SparseNeuS_demo_v1/confs/blender_general_lod1_val_new.conf deleted file mode 100644 index dacbc09968c2f4cd6f7348dd93552ea5d8876236..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/confs/blender_general_lod1_val_new.conf +++ /dev/null @@ -1,137 +0,0 @@ -# - for the lod1 geometry network, using adaptive cost for sparse cost regularization network -#- for lod1 rendering network, using depth-adaptive render - -general { - base_exp_dir = ./exp/val/1_4_only_narrow_lod1 - - recording = [ - ./, - ./data - ./ops - ./models - ./loss - ] -} - -dataset { - # local path - trainpath = /objaverse-processed/zero12345_img/eval_selected - valpath = /objaverse-processed/zero12345_img/eval_selected - testpath = /objaverse-processed/zero12345_img/eval_selected - # trainpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/ - # valpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/ - # testpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/ - imgScale_train = 1.0 - imgScale_test = 1.0 - nviews = 5 - clean_image = True - importance_sample = True - test_ref_views = [23] - - # test dataset - test_n_views = 2 - test_img_wh = [256, 256] - test_clip_wh = [0, 0] - test_scan_id = scan110 - train_img_idx = [49, 50, 52, 53, 54, 56, 58] #[21, 22, 23, 24, 25] # - test_img_idx = [51, 55, 57] #[32, 33, 34] # - - test_dir_comment = train -} - -train { - learning_rate = 2e-4 - learning_rate_milestone = [100000, 150000, 200000] - learning_rate_factor = 0.5 - end_iter = 200000 - save_freq = 5000 - val_freq = 1 - val_mesh_freq =1 - report_freq = 100 - - N_rays = 512 - - validate_resolution_level = 4 - anneal_start = 0 - anneal_end = 25000 - anneal_start_lod1 = 0 - anneal_end_lod1 = 15000 - - use_white_bkgd = True - - # Loss - # ! for training the lod1 network, don't use this regularization in first 10k steps; then use the regularization - sdf_igr_weight = 0.1 - sdf_sparse_weight = 0.02 # 0.002 for lod1 network; 0.02 for lod0 network - sdf_decay_param = 100 # cannot be too large, which decide the tsdf range - fg_bg_weight = 0.01 # first 0.01 - bg_ratio = 0.3 - - if_fix_lod0_networks = True -} - -model { - num_lods = 2 - - sdf_network_lod0 { - lod = 0, - ch_in = 56, # the channel num of fused pyramid features - voxel_size = 0.02105263, # 0.02083333, should be 2/95 - vol_dims = [96, 96, 96], - hidden_dim = 128, - cost_type = variance_mean - d_pyramid_feature_compress = 16, - regnet_d_out = 16, - num_sdf_layers = 4, - # position embedding - multires = 6 - } - - - sdf_network_lod1 { - lod = 1, - ch_in = 56, # the channel num of fused pyramid features - voxel_size = 0.0104712, #0.01041667, should be 2/191 - vol_dims = [192, 192, 192], - hidden_dim = 128, - cost_type = variance_mean - d_pyramid_feature_compress = 8, - regnet_d_out = 8, - num_sdf_layers = 4, - # position embedding - multires = 6 - } - - - variance_network { - init_val = 0.2 - } - - variance_network_lod1 { - init_val = 0.2 - } - - rendering_network { - in_geometry_feat_ch = 16 - in_rendering_feat_ch = 56 - anti_alias_pooling = True - } - - rendering_network_lod1 { - in_geometry_feat_ch = 8 - in_rendering_feat_ch = 56 - anti_alias_pooling = True - - } - - - trainer { - n_samples_lod0 = 64 - n_importance_lod0 = 64 - n_samples_lod1 = 64 - n_importance_lod1 = 64 - n_outside = 0 # 128 if render_outside_uniform_sampling - perturb = 1.0 - alpha_type = div - } -} diff --git a/SparseNeuS_demo_v1/confs/one2345_lod0_val_demo.conf b/SparseNeuS_demo_v1/confs/one2345_lod0_val_demo.conf index 7be6d4098d66473f63252c42d0a1bd25e2338a6b..e591ac038b854140efc81cdad3c8dc7838f03a83 100644 --- a/SparseNeuS_demo_v1/confs/one2345_lod0_val_demo.conf +++ b/SparseNeuS_demo_v1/confs/one2345_lod0_val_demo.conf @@ -18,8 +18,6 @@ dataset { valpath = ../ # !!! where you store the validation data testpath = ../ - - imgScale_train = 1.0 imgScale_test = 1.0 nviews = 5 diff --git a/SparseNeuS_demo_v1/data/__init__.py b/SparseNeuS_demo_v1/data/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/SparseNeuS_demo_v1/data/blender.py b/SparseNeuS_demo_v1/data/blender.py deleted file mode 100644 index c027f3e05367497c91026b362af4378fe31ff24a..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender.py +++ /dev/null @@ -1,340 +0,0 @@ -import torch -from torch.utils.data import Dataset -import json -import numpy as np -import os -from PIL import Image -from torchvision import transforms as T -from kornia import create_meshgrid -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import cv2 as cv -from data.scene import get_boundingbox - - -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def get_rays(directions, c2w): - """ - Get ray origin and normalized directions in world coordinate for all pixels in one image. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - directions: (H, W, 3) precomputed ray directions in camera coordinate - c2w: (3, 4) transformation matrix from camera coordinate to world coordinate - Outputs: - rays_o: (H*W, 3), the origin of the rays in world coordinate - rays_d: (H*W, 3), the normalized direction of the rays in world coordinate - """ - # Rotate ray directions from camera coordinate to the world coordinate - rays_d = directions @ c2w[:3, :3].T # (H, W, 3) - # rays_d = rays_d / torch.norm(rays_d, dim=-1, keepdim=True) - # The origin of all rays is the camera origin in world coordinate - rays_o = c2w[:3, 3].expand(rays_d.shape) # (H, W, 3) - - rays_d = rays_d.view(-1, 3) - rays_o = rays_o.view(-1, 3) - - return rays_o, rays_d - - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -class BlenderDataset(Dataset): - def __init__(self, root_dir, split, scan_id, n_views, train_img_idx=[], test_img_idx=[], - img_wh=[800, 800], clip_wh=[0, 0], original_img_wh=[800, 800], - N_rays=512, h_patch_size=5, near=2.0, far=6.0): - self.root_dir = root_dir - self.split = split - self.img_wh = img_wh - self.clip_wh = clip_wh - self.define_transforms() - self.train_img_idx = train_img_idx - self.test_img_idx = test_img_idx - self.N_rays = N_rays - self.h_patch_size = h_patch_size # used to extract patch for supervision - self.n_views = n_views - self.near, self.far = near, far - self.blender2opencv = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]) - - with open(os.path.join(self.root_dir, f"transforms_{self.split}.json"), 'r') as f: - self.meta = json.load(f) - - - self.read_meta(near, far) - # import ipdb; ipdb.set_trace() - self.raw_near_fars = np.stack([np.array([self.near, self.far]) for i in range(len(self.meta['frames']))]) - - - # ! estimate scale_mat - self.scale_mat, self.scale_factor = self.cal_scale_mat( - img_hw=[self.img_wh[1], self.img_wh[0]], - intrinsics=self.all_intrinsics[self.train_img_idx], - extrinsics=self.all_w2cs[self.train_img_idx], - near_fars=self.raw_near_fars[self.train_img_idx], - factor=1.1) - # self.scale_mat = np.eye(4) - # self.scale_factor = 1.0 - # import ipdb; ipdb.set_trace() - # * after scaling and translation, unit bounding box - self.scaled_intrinsics, self.scaled_w2cs, self.scaled_c2ws, \ - self.scaled_affine_mats, self.scaled_near_fars = self.scale_cam_info() - - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - self.partial_vol_origin = torch.Tensor([-1., -1., -1.]) - self.white_back = True - - def read_meta(self, near=2.0, far=6.0): - - - self.ref_img_idx = self.train_img_idx[0] - ref_c2w = np.array(self.meta['frames'][self.ref_img_idx]['transform_matrix']) @ self.blender2opencv - # ref_c2w = torch.FloatTensor(ref_c2w) - self.ref_c2w = ref_c2w - self.ref_w2c = np.linalg.inv(ref_c2w) - - - w, h = self.img_wh - self.focal = 0.5 * 800 / np.tan(0.5 * self.meta['camera_angle_x']) # original focal length - self.focal *= self.img_wh[0] / 800 # modify focal length to match size self.img_wh - - # bounds, common for all scenes - self.near = near - self.far = far - self.bounds = np.array([self.near, self.far]) - - # ray directions for all pixels, same for all images (same H, W, focal) - self.directions = get_ray_directions(h, w, [self.focal,self.focal]) # (h, w, 3) - intrinsics = np.eye(4) - intrinsics[:3, :3] = np.array([[self.focal,0,w/2],[0,self.focal,h/2],[0,0,1]]).astype(np.float32) - self.intrinsics = intrinsics - - self.image_paths = [] - self.poses = [] - self.all_rays = [] - self.all_images = [] - self.all_masks = [] - self.all_w2cs = [] - self.all_intrinsics = [] - for frame in self.meta['frames']: - pose = np.array(frame['transform_matrix']) @ self.blender2opencv - self.poses += [pose] - c2w = torch.FloatTensor(pose) - w2c = np.linalg.inv(c2w) - image_path = os.path.join(self.root_dir, f"{frame['file_path']}.png") - self.image_paths += [image_path] - img = Image.open(image_path) - img = img.resize(self.img_wh, Image.LANCZOS) - img = self.transform(img) # (4, h, w) - - self.all_masks += [img[-1:,:]>0] - # img = img[:3, :] * img[ -1:,:] + (1 - img[-1:, :]) # blend A to RGB - img = img[:3, :] * img[ -1:,:] - img = img.numpy() # (3, h, w) - self.all_images += [img] - - - self.all_masks += [] - self.all_intrinsics.append(self.intrinsics) - # - transform from world system to ref-camera system - self.all_w2cs.append(w2c @ np.linalg.inv(self.ref_w2c)) - - self.all_images = torch.from_numpy(np.stack(self.all_images)).to(torch.float32) - self.all_intrinsics = torch.from_numpy(np.stack(self.all_intrinsics)).to(torch.float32) - self.all_w2cs = torch.from_numpy(np.stack(self.all_w2cs)).to(torch.float32) - # self.img_wh = [self.img_wh[0] - self.clip_wh[0] - self.clip_wh[2], - # self.img_wh[1] - self.clip_wh[1] - self.clip_wh[3]] - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def scale_cam_info(self): - new_intrinsics = [] - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - for idx in range(len(self.all_images)): - - intrinsics = self.all_intrinsics[idx] - # import ipdb; ipdb.set_trace() - P = intrinsics @ self.all_w2cs[idx] @ self.scale_mat - P = P.cpu().numpy()[:3, :4] - - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - new_intrinsics.append(intrinsics) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsics[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - - new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars = \ - np.stack(new_intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), \ - np.stack(new_affine_mats), np.stack(new_near_fars) - - new_intrinsics = torch.from_numpy(np.float32(new_intrinsics)) - new_w2cs = torch.from_numpy(np.float32(new_w2cs)) - new_c2ws = torch.from_numpy(np.float32(new_c2ws)) - new_affine_mats = torch.from_numpy(np.float32(new_affine_mats)) - new_near_fars = torch.from_numpy(np.float32(new_near_fars)) - - return new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars - - def load_poses_all(self, file=f"transforms_train.json"): - with open(os.path.join(self.root_dir, file), 'r') as f: - meta = json.load(f) - - c2ws = [] - for i,frame in enumerate(meta['frames']): - c2ws.append(np.array(frame['transform_matrix']) @ self.blender2opencv) - return np.stack(c2ws) - - def define_transforms(self): - self.transform = T.ToTensor() - - - - def get_conditional_sample(self): - sample = {} - support_idxs = self.train_img_idx - - sample['images'] = self.all_images[support_idxs] # (V, 3, H, W) - sample['w2cs'] = self.scaled_w2cs[self.train_img_idx] # (V, 4, 4) - sample['c2ws'] = self.scaled_c2ws[self.train_img_idx] # (V, 4, 4) - sample['near_fars'] = self.scaled_near_fars[self.train_img_idx] # (V, 2) - sample['intrinsics'] = self.scaled_intrinsics[self.train_img_idx][:, :3, :3] # (V, 3, 3) - sample['affine_mats'] = self.scaled_affine_mats[self.train_img_idx] # ! in world space - - # sample['scan'] = self.scan_id - sample['scale_factor'] = torch.tensor(self.scale_factor) - sample['scale_mat'] = torch.from_numpy(self.scale_mat) - sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c)) - sample['img_wh'] = torch.from_numpy(np.array(self.img_wh)) - sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32) - - return sample - - - - def __len__(self): - if self.split == 'train': - return self.n_views * 1000 - else: - return len(self.test_img_idx) * 1000 - - - def __getitem__(self, idx): - sample = {} - - if self.split == 'train': - render_idx = self.train_img_idx[idx % self.n_views] - support_idxs = [idx for idx in self.train_img_idx if idx != render_idx] - else: - # render_idx = idx % self.n_test_images + self.n_train_images - render_idx = self.test_img_idx[idx % len(self.test_img_idx)] - support_idxs = [render_idx] - - sample['images'] = self.all_images[support_idxs] # (V, 3, H, W) - sample['w2cs'] = self.scaled_w2cs[support_idxs] # (V, 4, 4) - sample['c2ws'] = self.scaled_c2ws[support_idxs] # (V, 4, 4) - sample['intrinsics'] = self.scaled_intrinsics[support_idxs][:, :3, :3] # (V, 3, 3) - sample['affine_mats'] = self.scaled_affine_mats[support_idxs] # ! in world space - # sample['scan'] = self.scan_id - sample['scale_factor'] = torch.tensor(self.scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(self.img_wh)) - sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32) - sample['img_index'] = torch.tensor(render_idx) - - # - query image - sample['query_image'] = self.all_images[render_idx] - sample['query_c2w'] = self.scaled_c2ws[render_idx] - sample['query_w2c'] = self.scaled_w2cs[render_idx] - sample['query_intrinsic'] = self.scaled_intrinsics[render_idx] - sample['query_near_far'] = self.scaled_near_fars[render_idx] - # sample['meta'] = str(self.scan_id) + "_" + os.path.basename(self.images_list[render_idx]) - sample['scale_mat'] = torch.from_numpy(self.scale_mat) - sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c)) - sample['rendering_c2ws'] = self.scaled_c2ws[self.test_img_idx] - sample['rendering_imgs_idx'] = torch.Tensor(np.array(self.test_img_idx).astype(np.int32)) - - # - generate rays - if self.split == 'val' or self.split == 'test': - sample_rays = gen_rays_from_single_image( - self.img_wh[1], self.img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=None, - mask=None) - else: - sample_rays = gen_random_rays_from_single_image( - self.img_wh[1], self.img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=None, - mask=None, - dilated_mask=None, - importance_sample=False, - h_patch_size=self.h_patch_size - ) - - sample['rays'] = sample_rays - - return sample \ No newline at end of file diff --git a/SparseNeuS_demo_v1/data/blender_general.py b/SparseNeuS_demo_v1/data/blender_general.py deleted file mode 100644 index 871bcd6e9e2542110213e34ac5e7bde97184d938..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general.py +++ /dev/null @@ -1,432 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600) - depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5, - interpolation=cv2.INTER_NEAREST) # (600, 800) - depth_h = depth_h[44:556, 80:720] # (512, 640) - depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4, - interpolation=cv2.INTER_NEAREST) - - return depth, depth_h - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 8*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - depth_h = cv2.imread(filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 65535 * 1.4 + 0.5 - - depth_h[depth_h < near_bound+1e-3] = 0.0 - - depth = {} - for l in range(3): - depth[f"level_{l}"] = cv2.resize( - depth_h, - None, - fx=1.0 / (2**l), - fy=1.0 / (2**l), - interpolation=cv2.INTER_NEAREST, - ) - - if self.split == "train": - cutout = np.ones_like(depth[f"level_2"]) - h0 = int(np.random.randint(0, high=cutout.shape[0] // 5, size=1)) - h1 = int( - np.random.randint( - 4 * cutout.shape[0] // 5, high=cutout.shape[0], size=1 - ) - ) - w0 = int(np.random.randint(0, high=cutout.shape[1] // 5, size=1)) - w1 = int( - np.random.randint( - 4 * cutout.shape[1] // 5, high=cutout.shape[1], size=1 - ) - ) - cutout[h0:h1, w0:w1] = 0 - depth_aug = depth[f"level_2"] * cutout - else: - depth_aug = depth[f"level_2"].copy() - - return depth, depth_h, depth_aug - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_uid_dict = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - # idx = idx % 8 - # uid = 'c40d63d5d740405e91c7f5fce855076e' - # folder_id = '000-123' - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - src_views = range(8+idx*4, 8+(idx+1)*4) - - - for vid in src_views: - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_{vid%4}_10.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - # print(scale_mat) - # print(scale_factor) - # ! calculate the new w2cs after scaling - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_12_narrow.py b/SparseNeuS_demo_v1/data/blender_general_12_narrow.py deleted file mode 100644 index bb1183fb695101bac1f8f33da9438a84378b3dca..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_12_narrow.py +++ /dev/null @@ -1,427 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - self.root_dir = root_dir - self.split = split - self.imgs_per_instance = 12 - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/narrow_12_split_upd.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path_narrow_8 = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path_narrow_8, 'r') as f: - narrow_8_meta = json.load(f) - - pose_json_path_narrow_4 = "/objaverse-processed/zero12345_img/zero12345_2stage_12_pose.json" - with open(pose_json_path_narrow_4, 'r') as f: - narrow_4_meta = json.load(f) - - - self.img_ids = list(narrow_8_meta["c2ws"].keys()) + list(narrow_4_meta["c2ws"].keys()) # (8 + 8*4) + (4 + 4*4) - self.img_wh = (256, 256) - self.input_poses = np.array(list(narrow_8_meta["c2ws"].values()) + list(narrow_4_meta["c2ws"].values())) - intrinsic = np.eye(4) - assert narrow_8_meta["intrinsics"] == narrow_4_meta["intrinsics"], "intrinsics not equal" - intrinsic[:3, :3] = np.array(narrow_8_meta["intrinsics"]) - self.intrinsic = intrinsic - assert narrow_8_meta["near_far"] == narrow_4_meta["near_far"], "near_far not equal" - self.near_far = np.array(narrow_8_meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - - - - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return self.imgs_per_instance*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - idx_original=idx - - folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance] - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - idx = idx % self.imgs_per_instance # [0, 11] - if idx < 8: - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - else: - # target view - c2w = self.c2ws[idx-8+40] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - - src_views = range(8, 8 + 8 * 4 + 4 + 4*4) - src_views_used = [] - skipped_idx = [40, 41, 42, 43] - for vid in src_views: - if vid in skipped_idx: - continue - - src_views_used.append(vid) - cur_view_id = (vid - 8) // 4 # [0, 7] - - # choose narrow - if cur_view_id < 8: - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png') - else: # choose 2-stage - cur_view_id = cur_view_id - 1 - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12", folder_id, uid, f'view_{cur_view_id}_{vid%4}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - - - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - # print("img numeber: ", len(imgs)) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx_original % self.imgs_per_instance] + src_views_used - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - if view_ids[0] < 8: - meta_end = "_narrow"+ "_refview" + str(view_ids[0]) - else: - meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8) - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_12_narrow_8.py b/SparseNeuS_demo_v1/data/blender_general_12_narrow_8.py deleted file mode 100644 index 467dc5d4d1df3b6d3c8aa4384a1048bec9910973..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_12_narrow_8.py +++ /dev/null @@ -1,427 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - self.root_dir = root_dir - self.split = split - self.imgs_per_instance = 8 - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/narrow_12_split_upd.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path_narrow_8 = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path_narrow_8, 'r') as f: - narrow_8_meta = json.load(f) - - pose_json_path_narrow_4 = "/objaverse-processed/zero12345_img/zero12345_2stage_12_pose.json" - with open(pose_json_path_narrow_4, 'r') as f: - narrow_4_meta = json.load(f) - - - self.img_ids = list(narrow_8_meta["c2ws"].keys()) + list(narrow_4_meta["c2ws"].keys()) # (8 + 8*4) + (4 + 4*4) - self.img_wh = (256, 256) - self.input_poses = np.array(list(narrow_8_meta["c2ws"].values()) + list(narrow_4_meta["c2ws"].values())) - intrinsic = np.eye(4) - assert narrow_8_meta["intrinsics"] == narrow_4_meta["intrinsics"], "intrinsics not equal" - intrinsic[:3, :3] = np.array(narrow_8_meta["intrinsics"]) - self.intrinsic = intrinsic - assert narrow_8_meta["near_far"] == narrow_4_meta["near_far"], "near_far not equal" - self.near_far = np.array(narrow_8_meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - - - - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return self.imgs_per_instance*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - idx_original=idx - - folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance] - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - idx = idx % self.imgs_per_instance # [0, 11] - if idx < 8: - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - else: - # target view - c2w = self.c2ws[idx-8+40] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - - src_views = range(8, 8 + 8 * 4 + 4 + 4*4) - src_views_used = [] - skipped_idx = [40, 41, 42, 43] - for vid in src_views: - if vid in skipped_idx: - continue - - src_views_used.append(vid) - cur_view_id = (vid - 8) // 4 # [0, 7] - - # choose narrow - if cur_view_id < 8: - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png') - else: # choose 2-stage - cur_view_id = cur_view_id - 1 - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12", folder_id, uid, f'view_{cur_view_id}_{vid%4}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - - - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - # print("img numeber: ", len(imgs)) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx_original % self.imgs_per_instance] + src_views_used - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - if view_ids[0] < 8: - meta_end = "_narrow"+ "_refview" + str(view_ids[0]) - else: - meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8) - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_360.py b/SparseNeuS_demo_v1/data/blender_general_360.py deleted file mode 100644 index 37e8664613a614c03227375d8a0b25224d694bdc..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_360.py +++ /dev/null @@ -1,412 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d - -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_wide_pose.json" - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - - - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600) - depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5, - interpolation=cv2.INTER_NEAREST) # (600, 800) - depth_h = depth_h[44:556, 80:720] # (512, 640) - depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4, - interpolation=cv2.INTER_NEAREST) - - return depth, depth_h - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 36*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_uid_dict = self.lvis_paths[idx//36] - - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - idx = idx % 36 # [0, 35] - gt_view_idx = idx // 12 # [0, 2] - target_view_idx = idx % 12 # [0, 11] - - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}_gt.png') - - depth_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}_gt_depth_mm.png') - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12) - - idx_of_12 = idx - 12 * gt_view_idx # idx % 12 - - src_views = list(i % 12 + 12 * gt_view_idx for i in range(idx_of_12 - 1-1, idx_of_12 + 2+1)) - - - for vid in src_views: - # if vid == idx: - # continue - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - # print(scale_mat) - # print(scale_factor) - # ! calculate the new w2cs after scaling - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx] + list(src_views) - - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_3.py b/SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_3.py deleted file mode 100644 index 72ad72bbfb336fa3e0d8b69f74c94afbea1593b7..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_3.py +++ /dev/null @@ -1,406 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_2stage_pose.json" - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600) - depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5, - interpolation=cv2.INTER_NEAREST) # (600, 800) - depth_h = depth_h[44:556, 80:720] # (512, 640) - depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4, - interpolation=cv2.INTER_NEAREST) - - return depth, depth_h - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 6*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_uid_dict = self.lvis_paths[idx//6] - idx = idx % 6 - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - # idx = idx % 24 # [0, 23] - - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_gt.png') - - depth_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_gt_depth_mm.png') - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12) - - - src_views = range(6+idx*4, 6+(idx+1)*4) - - for vid in src_views: - # if vid == idx: - # continue - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_{vid % 4}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - # print(scale_mat) - # print(scale_factor) - # ! calculate the new w2cs after scaling - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx] + list(src_views) - - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_4.py b/SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_4.py deleted file mode 100644 index 380706615bfe4a183b302f127af9913bfc2f4790..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_4.py +++ /dev/null @@ -1,411 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_2stage_5pred_pose.json" - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600) - depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5, - interpolation=cv2.INTER_NEAREST) # (600, 800) - depth_h = depth_h[44:556, 80:720] # (512, 640) - depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4, - interpolation=cv2.INTER_NEAREST) - - return depth, depth_h - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 6*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_uid_dict = self.lvis_paths[idx//6] - idx = idx % 6 - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - # idx = idx % 24 # [0, 23] - - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage", folder_id, uid, f'view_0_{idx}_gt.png') - - depth_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage", folder_id, uid, f'view_0_{idx}_gt_depth_mm.png') - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - # print("img_pre", img.shape) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - # print("img", img.shape) - imgs += [img] - - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - # print("depth_h", depth_h.shape) - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12) - - - src_views = range(6+idx*4, 6+(idx+1)*4) - - for vid in src_views: - # if vid == idx: - # continue - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_{vid % 4 + 1}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - # print("img shape1: ", img.shape) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - # print("img shape2: ", img.shape) - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - # print(scale_mat) - # print(scale_factor) - # ! calculate the new w2cs after scaling - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - # print("imgs: ", len(imgs)) - # print("img1 shape:", imgs[0].shape) - # print("img2 shape:", imgs[1].shape) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx] + list(src_views) - - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_4_narrow_and_4_2_stage_mix.py b/SparseNeuS_demo_v1/data/blender_general_4_narrow_and_4_2_stage_mix.py deleted file mode 100644 index beb1f976907680936b20b37d76133589804d40c5..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_4_narrow_and_4_2_stage_mix.py +++ /dev/null @@ -1,480 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - self.root_dir = root_dir - self.split = split - self.imgs_per_instance = 16 - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path_narrow, 'r') as f: - narrow_meta = json.load(f) - - pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json" - with open(pose_json_path_two_stage, 'r') as f: - two_stage_meta = json.load(f) - - - self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4) - self.img_wh = (256, 256) - self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values())) - intrinsic = np.eye(4) - assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal" - intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"]) - self.intrinsic = intrinsic - assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal" - self.near_far = np.array(narrow_meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - - - - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return self.imgs_per_instance * len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - idx_original=idx - - folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance] - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - if idx % 2 == 0: - valid_list = [0, 2, 4, 6] - else: - valid_list = [1, 3, 5, 7] - - if idx % 16 < 8: - idx = idx % 16 # [0, 7] - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - - src_views = range(8, 8 + 8 * 4) - src_views_used = [] - for vid in src_views: - view_dix_to_use = (vid - 8) // 4 - if view_dix_to_use not in valid_list: - continue - src_views_used.append(vid) - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - else: - idx = idx % 16 - 8 # [0, 7] - - c2w = self.c2ws[idx + 40] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png') - - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - # print("img_pre", img.shape) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - # print("img", img.shape) - imgs += [img] - - - depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - # depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0) - # print("depth_h", depth_h.shape) - mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32) - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - - - src_views = range(40+8, 40+8+32) - src_views_used = [] - for vid in src_views: - view_dix_to_use = (vid - 40 - 8) // 4 - if view_dix_to_use not in valid_list: - continue - src_views_used.append(vid) - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-48) % 4 + 1}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - # print("img shape1: ", img.shape) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - # print("img shape2: ", img.shape) - imgs += [img] - depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - # print("img numeber: ", len(imgs)) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx_original % self.imgs_per_instance] + src_views_used - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - if view_ids[0] < 8: - meta_end = "_narrow"+ "_refview" + str(view_ids[0]) - else: - meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8) - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_4_narrow_and_6_2_stage_mix.py b/SparseNeuS_demo_v1/data/blender_general_4_narrow_and_6_2_stage_mix.py deleted file mode 100644 index e80567fe34ee51cb49355ee26ea8ce80dff706e6..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_4_narrow_and_6_2_stage_mix.py +++ /dev/null @@ -1,476 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path_narrow, 'r') as f: - narrow_meta = json.load(f) - - pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_5pred_pose.json" - with open(pose_json_path_two_stage, 'r') as f: - two_stage_meta = json.load(f) - - - self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (6 + 6*4) - self.img_wh = (256, 256) - self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values())) - intrinsic = np.eye(4) - assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal" - intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"]) - self.intrinsic = intrinsic - assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal" - self.near_far = np.array(narrow_meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - - - - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 12*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - idx_original=idx - - folder_uid_dict = self.lvis_paths[idx//12] - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - if idx % 12 < 8: - idx = idx % 12 # [0, 7] - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - - src_views = range(8, 8 + 8 * 4) - src_views_used = [] - for vid in src_views: - if (vid // 4) % 2 != idx % 2: - continue - src_views_used.append(vid) - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - else: - idx = idx % 12 - 8 # [0, 5] - valid_list = [0, 2, 3, 5] - idx = valid_list[idx] # [0, 3] - c2w = self.c2ws[idx + 40] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_5pred/", folder_id, uid, f'view_0_{idx}_0.png') - - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - # print("img_pre", img.shape) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - # print("img", img.shape) - imgs += [img] - - - depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - # depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0) - # print("depth_h", depth_h.shape) - mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32) - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12) - - - src_views = range(40+6, 40+6+24) - src_views_used = [] - for vid in src_views: - view_dix_to_use = (vid - 40 - 6) // 4 - if view_dix_to_use not in valid_list: - continue - src_views_used.append(vid) - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_5pred/", folder_id, uid, f'view_0_{idx}_{(vid-46) % 4 + 1}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - # print("img shape1: ", img.shape) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - # print("img shape2: ", img.shape) - imgs += [img] - depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - # print("img numeber: ", len(imgs)) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx_original % 12] + src_views_used - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - if view_ids[0] < 8: - meta_end = "_narrow"+ "_refview" + str(view_ids[0]) - else: - meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8) - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_6_narrow_and_6_2_stage_blend_mix.py b/SparseNeuS_demo_v1/data/blender_general_6_narrow_and_6_2_stage_blend_mix.py deleted file mode 100644 index 248e9f9591b95a711406b0e1efb3568e05e2414a..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_6_narrow_and_6_2_stage_blend_mix.py +++ /dev/null @@ -1,449 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - self.root_dir = root_dir - self.split = split - if self.split == 'train': - self.imgs_per_instance = 12 - else: - self.imgs_per_instance = 16 - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path_narrow, 'r') as f: - narrow_meta = json.load(f) - - pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json" - with open(pose_json_path_two_stage, 'r') as f: - two_stage_meta = json.load(f) - - - self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4) - self.img_wh = (256, 256) - self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values())) - intrinsic = np.eye(4) - assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal" - intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"]) - self.intrinsic = intrinsic - assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal" - self.near_far = np.array(narrow_meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - - - - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return self.imgs_per_instance*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - idx_original=idx - - folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance] - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - if self.split == 'train': - if idx == 4: - idx = 5 - elif idx == 5: - idx = 7 - elif idx == 10: - idx = 13 - elif idx == 11: - idx = 15 - - if idx % 16 < 8: # narrow image as target - idx = idx % 16 # [0, 7] - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - else: - idx = idx % 16 - 8 # [0, 5] - c2w = self.c2ws[idx + 40] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png') - - img = Image.open(img_filename) - img = self.transform(img) # (4, h, w) - - # print("img_pre", img.shape) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - # print("img", img.shape) - imgs += [img] - - depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - - mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32) - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - if_use_narrow = [] - if self.split == 'train': - for i in range(8): - if np.random.random() > 0.5: - if_use_narrow.append(True) # use narrow - else: - if_use_narrow.append(False) # 2-stage prediction - if_use_narrow[origin_idx % 8] = True if origin_idx < 8 else False - else: - for i in range(8): - if_use_narrow.append( True if origin_idx < 8 else False) - src_views = range(8, 8 + 8 * 4) - src_views_used = [] - for vid in src_views: - if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6): - continue - src_views_used.append(vid) - cur_view_id = (vid - 8) // 4 - # choose narrow - if if_use_narrow[cur_view_id]: - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png') - else: # choose 2-stage - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{(vid - 8) // 4}_{(vid-8) % 4 + 1}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx_original % self.imgs_per_instance] + src_views_used - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - if view_ids[0] < 8: - meta_end = "_narrow"+ "_refview" + str(view_ids[0]) - else: - meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8) - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_8_2_stage.py b/SparseNeuS_demo_v1/data/blender_general_8_2_stage.py deleted file mode 100644 index e1fd371e5fc7be9685b81efa3d607018b2a9bdb1..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_8_2_stage.py +++ /dev/null @@ -1,396 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - self.root_dir = root_dir - self.split = split - - self.imgs_per_instance = 8 - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path_narrow, 'r') as f: - narrow_meta = json.load(f) - - pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json" - with open(pose_json_path_two_stage, 'r') as f: - two_stage_meta = json.load(f) - - - self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4) - self.img_wh = (256, 256) - self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values())) - intrinsic = np.eye(4) - assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal" - intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"]) - self.intrinsic = intrinsic - assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal" - self.near_far = np.array(narrow_meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - - - - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return self.imgs_per_instance * len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - idx_original=idx - - folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance] - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - idx = idx % self.imgs_per_instance # [0, 7] - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - - - src_views = range(8, 8+32) - src_views_used = [] - for vid in src_views: - view_dix_to_use = (vid - 8) // 4 - src_views_used.append(vid) - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-8) % 4 + 1}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - - new_depths_h.append(depth * scale_factor) - - - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx_original % self.imgs_per_instance] + src_views_used - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8) - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_8_4_gt.py b/SparseNeuS_demo_v1/data/blender_general_8_4_gt.py deleted file mode 100644 index b1072d6a3e02f1908add474963aa6c6acaf69055..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_8_4_gt.py +++ /dev/null @@ -1,396 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - self.root_dir = root_dir - self.split = split - - self.imgs_per_instance = 8 - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path_narrow, 'r') as f: - narrow_meta = json.load(f) - - pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json" - with open(pose_json_path_two_stage, 'r') as f: - two_stage_meta = json.load(f) - - - self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4) - self.img_wh = (256, 256) - self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values())) - intrinsic = np.eye(4) - assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal" - intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"]) - self.intrinsic = intrinsic - assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal" - self.near_far = np.array(narrow_meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - - - - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return self.imgs_per_instance * len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - idx_original=idx - - folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance] - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - idx = idx % self.imgs_per_instance # [0, 7] - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - - - src_views = range(8, 8+32) - src_views_used = [] - for vid in src_views: - view_dix_to_use = (vid - 8) // 4 - src_views_used.append(vid) - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10_gt.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - - new_depths_h.append(depth * scale_factor) - - - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx_original % self.imgs_per_instance] + src_views_used - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8) - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_3_views.py b/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_3_views.py deleted file mode 100644 index fa97eb6ca99c254548e501f2e05d883f2b015e1c..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_3_views.py +++ /dev/null @@ -1,446 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - self.root_dir = root_dir - self.split = split - self.imgs_per_instance = 16 - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path_narrow, 'r') as f: - narrow_meta = json.load(f) - - pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json" - with open(pose_json_path_two_stage, 'r') as f: - two_stage_meta = json.load(f) - - - self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4) - self.img_wh = (256, 256) - self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values())) - intrinsic = np.eye(4) - assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal" - intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"]) - self.intrinsic = intrinsic - assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal" - self.near_far = np.array(narrow_meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - - - - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return self.imgs_per_instance*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - idx_original=idx - - folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance] - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - if idx % 16 < 8: # narrow image as target - idx = idx % self.imgs_per_instance # [0, 7] - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - else: - idx = idx % self.imgs_per_instance - 8 # [0, 5] - c2w = self.c2ws[idx + 40] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png') - - - img = Image.open(img_filename) - img = self.transform(img) # (4, h, w) - - # print("img_pre", img.shape) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - # print("img", img.shape) - imgs += [img] - - depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - - mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32) - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - if_use_narrow = [] - if self.split == 'train': - for i in range(8): - if np.random.random() > 0.5: - if_use_narrow.append(True) # use narrow - else: - if_use_narrow.append(False) # 2-stage prediction - if_use_narrow[origin_idx % 8] = True if origin_idx < 8 else False - else: - for i in range(8): - if_use_narrow.append( True if origin_idx < 8 else False) - - src_views = list() - for i in range(8): - # randomly choose 3 different number from [0,3] - local_idxs = np.random.choice(4, 3, replace=False) - local_idxs = [0,1,2] - local_idxs = [8+i*4+local_idx for local_idx in local_idxs] - src_views += local_idxs - src_views_used = [] - for vid in src_views: - src_views_used.append(vid) - cur_view_id = (vid - 8) // 4 - # choose narrow - if if_use_narrow[cur_view_id]: - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png') - else: # choose 2-stage - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{(vid - 8) // 4}_{(vid-8) % 4 + 1}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - - - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - # print("img numeber: ", len(imgs)) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx_original % self.imgs_per_instance] + src_views_used - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - if view_ids[0] < 8: - meta_end = "_narrow"+ "_refview" + str(view_ids[0]) - else: - meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8) - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_mix.py b/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_mix.py deleted file mode 100644 index 740bb81125a297fc1d504f4c119c7f9a76630507..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_mix.py +++ /dev/null @@ -1,439 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - self.root_dir = root_dir - self.split = split - self.imgs_per_instance = 16 - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path_narrow, 'r') as f: - narrow_meta = json.load(f) - - pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json" - with open(pose_json_path_two_stage, 'r') as f: - two_stage_meta = json.load(f) - - - self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4) - self.img_wh = (256, 256) - self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values())) - intrinsic = np.eye(4) - assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal" - intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"]) - self.intrinsic = intrinsic - assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal" - self.near_far = np.array(narrow_meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - - - - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return self.imgs_per_instance*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - idx_original=idx - - folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance] - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - if idx % 16 < 8: # gt image as target - idx = idx % self.imgs_per_instance # [0, 7] - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - else: - idx = idx % self.imgs_per_instance - 8 # [0, 7] - c2w = self.c2ws[idx + 40] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png') - - - img = Image.open(img_filename) - img = self.transform(img) # (4, h, w) - - # print("img_pre", img.shape) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - # print("img", img.shape) - imgs += [img] - - depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - - mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32) - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - if_use_narrow = [] - if self.split == 'train': - for i in range(8): - if np.random.random() > 0.5: - if_use_narrow.append(True) # use narrow - else: - if_use_narrow.append(False) # 2-stage prediction - if_use_narrow[origin_idx % 8] = True if (origin_idx % 16) < 8 else False - else: - for i in range(8): - if_use_narrow.append( True if (origin_idx % 16) < 8 else False) - src_views = range(8, 8 + 8 * 4) - src_views_used = [] - for vid in src_views: - src_views_used.append(vid) - cur_view_id = (vid - 8) // 4 # [0, 7] - # choose narrow - if if_use_narrow[cur_view_id]: - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png') - else: # choose 2-stage - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{cur_view_id}_{(vid) % 4 + 1}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - - - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - # print("img numeber: ", len(imgs)) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx_original % self.imgs_per_instance] + src_views_used - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - if view_ids[0] < 8: - meta_end = "_narrow"+ "_refview" + str(view_ids[0]) - else: - meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8) - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_mix.py b/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_mix.py deleted file mode 100644 index 6d860e521935b529c4240a0299d892ff90f683b2..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_mix.py +++ /dev/null @@ -1,470 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - self.root_dir = root_dir - self.split = split - self.imgs_per_instance = 16 - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path_narrow, 'r') as f: - narrow_meta = json.load(f) - - pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json" - with open(pose_json_path_two_stage, 'r') as f: - two_stage_meta = json.load(f) - - - self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4) - self.img_wh = (256, 256) - self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values())) - intrinsic = np.eye(4) - assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal" - intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"]) - self.intrinsic = intrinsic - assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal" - self.near_far = np.array(narrow_meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - - - - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return self.imgs_per_instance * len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - idx_original=idx - - folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance] - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - if idx % self.imgs_per_instance < 8: - idx = idx % self.imgs_per_instance # [0, 7] - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - - src_views = range(8, 8 + 8 * 4) - src_views_used = [] - for vid in src_views: - src_views_used.append(vid) - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - else: - idx = idx % self.imgs_per_instance - 8 # [0, 5] - - c2w = self.c2ws[idx + 40] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png') - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - # print("img_pre", img.shape) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - # print("img", img.shape) - imgs += [img] - - - depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - # depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0) - # print("depth_h", depth_h.shape) - mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32) - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - - - src_views = range(40+8, 40+8+32) - src_views_used = [] - for vid in src_views: - view_dix_to_use = (vid - 40 - 8) // 4 - - src_views_used.append(vid) - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-48) % 4 + 1}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - # print("img shape1: ", img.shape) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - # print("img shape2: ", img.shape) - imgs += [img] - depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - # print("img numeber: ", len(imgs)) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx_original % self.imgs_per_instance] + src_views_used - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - if view_ids[0] < 8: - meta_end = "_narrow"+ "_refview" + str(view_ids[0]) - else: - meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8) - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_8_wide_from_2_stage.py b/SparseNeuS_demo_v1/data/blender_general_8_wide_from_2_stage.py deleted file mode 100644 index 9609f20a733486544347d7fec78ae16bf1b9e2a3..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_8_wide_from_2_stage.py +++ /dev/null @@ -1,395 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - self.root_dir = root_dir - self.split = split - - self.imgs_per_instance = 8 - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/random32_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path_narrow, 'r') as f: - narrow_meta = json.load(f) - - pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json" - with open(pose_json_path_two_stage, 'r') as f: - two_stage_meta = json.load(f) - - - self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4) - self.img_wh = (256, 256) - self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values())) - intrinsic = np.eye(4) - assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal" - intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"]) - self.intrinsic = intrinsic - assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal" - self.near_far = np.array(narrow_meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - - - - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return self.imgs_per_instance * len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - idx_original=idx - - folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance] - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - idx = idx % self.imgs_per_instance # [0, 7] - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - - - src_views = range(0, 8) - src_views_used = [] - for vid in src_views: - src_views_used.append(vid) - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{vid}_0.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - - new_depths_h.append(depth * scale_factor) - - - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx_original % self.imgs_per_instance] + src_views_used - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8) - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_4_1_eval_new_data.py b/SparseNeuS_demo_v1/data/blender_general_narrow_4_1_eval_new_data.py deleted file mode 100644 index bacd68d0d8cc7b578bf546e4484590f985920051..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_4_1_eval_new_data.py +++ /dev/null @@ -1,418 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d - - -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[], - specific_dataset_name = 'GSO' - ): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - # self.specific_dataset_name = 'Realfusion' - # self.specific_dataset_name = 'GSO' - # self.specific_dataset_name = 'Objaverse' - # self.specific_dataset_name = 'Zero123' - - self.specific_dataset_name = specific_dataset_name - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh' - # find all subfolders - main_folder = os.path.join(root_dir, self.specific_dataset_name) - self.shape_list = os.listdir(main_folder) - self.shape_list.sort() - - # self.shape_list = ['barrel_render'] - # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED - - - self.lvis_paths = [] - for shape_name in self.shape_list: - self.lvis_paths.append(os.path.join(main_folder, shape_name)) - - # print("lvis_paths: ", self.lvis_paths) - - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 8*len(self.lvis_paths) - # return len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - # idx = idx * 8 # to be deleted - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views - - folder_path = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - - # last subdir name - shape_name = os.path.split(folder_path)[-1] - - pose_json_path = os.path.join(folder_path, "pose.json") - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for image_dix, img_id in enumerate(self.img_ids): - pose = self.input_poses[image_dix] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}') - img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}') - - img = Image.open(img_filename) - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - - depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32) - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - - # src_views = range(8, 8 + 8 * 4) - src_views = range(8+idx*4, 8+(idx+1)*4) - for vid in src_views: - - # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}') - img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}') - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - - - target_w2cs = [] - target_intrinsics = [] - new_target_w2cs = [] - for i_idx in range(8): - target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv) - target_intrinsics.append(self.all_intrinsics[i_idx]) - - for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_target_w2cs.append(w2c) - target_w2cs = np.stack(new_target_w2cs) - - - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = shape_name - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0]) - # print("meta: ", sample['meta']) - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_6.py b/SparseNeuS_demo_v1/data/blender_general_narrow_6.py deleted file mode 100644 index 5d8333986bb15b3e3fd495f1ee4600e22ef93246..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_6.py +++ /dev/null @@ -1,399 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - if self.split == 'train': - return 6*len(self.lvis_paths) - else: - return 8*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - if self.split == 'train': - folder_uid_dict = self.lvis_paths[idx//6] - idx = idx % 6 # [0, 5] - if idx == 4: - idx = 5 - elif idx == 5: - idx = 7 - else: - folder_uid_dict = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - src_views = range(8, 8 + 8 * 4) - - for vid in src_views: - if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6): - continue - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - # print("len(imges)", len(imgs)) - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_8_3_fixed.py b/SparseNeuS_demo_v1/data/blender_general_narrow_8_3_fixed.py deleted file mode 100644 index 58c26348e73b44fdcb33bad81b1fddba66efeffc..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_8_3_fixed.py +++ /dev/null @@ -1,393 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 8*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_uid_dict = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - src_views = list() - for i in range(8): - # randomly choose 3 different number from [0,3] - # local_idxs = np.random.choice(4, 3, replace=False) - local_idxs = [0, 2, 3] - # local_idxs = np.random.choice(4, 3, replace=False) - - local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs] - src_views += local_idxs - for vid in src_views: - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - # print("len(imgs)", len(imgs)) - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_8_3_random.py b/SparseNeuS_demo_v1/data/blender_general_narrow_8_3_random.py deleted file mode 100644 index b52542595e8d39dff91f18e63a0b504c4c4d2d48..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_8_3_random.py +++ /dev/null @@ -1,395 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 8*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_uid_dict = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - src_views = list() - for i in range(8): - - if self.split == 'train': - local_idxs = np.random.choice(4, 3, replace=False) - else: - local_idxs = [0, 2, 3] - # local_idxs = np.random.choice(4, 3, replace=False) - - local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs] - src_views += local_idxs - for vid in src_views: - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - # print("len(imgs)", len(imgs)) - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_8_4_random_shading.py b/SparseNeuS_demo_v1/data/blender_general_narrow_8_4_random_shading.py deleted file mode 100644 index e120367ce96847e9fb60b2ae038a812583fe75e3..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_8_4_random_shading.py +++ /dev/null @@ -1,432 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 8*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_uid_dict = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - src_views = range(8, 8 + 8 * 4) - - for vid in src_views: - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - if self.split == 'train': - # randomly select one view from eight views as reference view - idx_to_select = np.random.randint(0, 8) - - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx_to_select}.png') - img = Image.open(img_filename) - img = self.transform(img) # (4, h, w) - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs[0] = img - - w2c_selected = self.all_extrinsics[idx_to_select] @ w2c_ref_inv - P = self.all_intrinsics[idx_to_select] @ w2c_selected @ scale_mat - P = P[:3, :4] - - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = self.all_intrinsics[idx_to_select][:3, :3] @ w2c[:3, :4] - new_affine_mats[0] = affine_mat - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - new_near_fars[0] = [0.95 * near, 1.05 * far] - - new_w2cs[0] = w2c - new_c2ws[0] = c2w - - depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx_to_select}_depth_mm.png')) - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance * scale_factor - - new_depths_h[0] = depth_h - masks_h[0] = mask_h - - - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all.py deleted file mode 100644 index 50b85d133707e83b36d926b7acf1cb121dd4d04d..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_all.py +++ /dev/null @@ -1,386 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 8*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_uid_dict = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - src_views = range(8, 8 + 8 * 4) - - for vid in src_views: - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage.py deleted file mode 100644 index 1b832beccd85c8a0be98edf95f0d244c1cbf8b17..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage.py +++ /dev/null @@ -1,410 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 8*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_uid_dict = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - # print("img_pre", img.shape) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - # print("img", img.shape) - imgs += [img] - - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - # print("depth_h", depth_h.shape) - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - src_views = range(8, 8 + 8 * 4) - - for vid in src_views: - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{(vid - 8) // 4}_{vid % 4 + 1}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - - - target_w2cs = [] - target_intrinsics = [] - new_target_w2cs = [] - for i_idx in range(8): - target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv) - target_intrinsics.append(self.all_intrinsics[i_idx]) - - for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_target_w2cs.append(w2c) - target_w2cs = np.stack(new_target_w2cs) - - - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage_temp.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage_temp.py deleted file mode 100644 index 5c2dbebd00ed9e0293c26029c97ab77b7880fcf0..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage_temp.py +++ /dev/null @@ -1,411 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 10 - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - idx = idx * 8 - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_uid_dict = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - # print("img_pre", img.shape) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - # print("img", img.shape) - imgs += [img] - - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - # print("depth_h", depth_h.shape) - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - src_views = range(8, 8 + 8 * 4) - - for vid in src_views: - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{(vid - 8) // 4}_{vid % 4 + 1}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - - - target_w2cs = [] - target_intrinsics = [] - new_target_w2cs = [] - for i_idx in range(8): - target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv) - target_intrinsics.append(self.all_intrinsics[i_idx]) - - for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_target_w2cs.append(w2c) - target_w2cs = np.stack(new_target_w2cs) - - - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data.py index 194cf007f54d2d377ce6561050f82e38dc246e73..530a434828d4fdb1c4d2439ea9fbdcc40d449ef6 100644 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data.py +++ b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data.py @@ -1,6 +1,6 @@ from torch.utils.data import Dataset -from utils.misc_utils import read_pfm import os +import json import numpy as np import cv2 from PIL import Image @@ -9,12 +9,7 @@ from torchvision import transforms as T from data.scene import get_boundingbox from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio from kornia import create_meshgrid -import open3d as o3d - def get_ray_directions(H, W, focal, center=None): """ @@ -73,10 +68,6 @@ class BlenderPerView(Dataset): # print("root_dir: ", root_dir) self.root_dir = root_dir self.split = split - # self.specific_dataset_name = 'Realfusion' - # self.specific_dataset_name = 'GSO' - # self.specific_dataset_name = 'Objaverse' - # self.specific_dataset_name = 'Zero123' self.specific_dataset_name = specific_dataset_name self.n_views = n_views @@ -102,8 +93,6 @@ class BlenderPerView(Dataset): for shape_name in self.shape_list: self.lvis_paths.append(os.path.join(main_folder, shape_name)) - # print("lvis_paths: ", self.lvis_paths) - if img_wh is not None: assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ 'img_wh must both be multiples of 32!' @@ -130,9 +119,6 @@ class BlenderPerView(Dataset): self.all_extrinsics.append(extrinsic) self.all_near_fars.append(near_far) - def read_depth(self, filename): - pass - def read_mask(self, filename): mask_h = cv2.imread(filename, 0) mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, @@ -160,11 +146,6 @@ class BlenderPerView(Dataset): # return 8*len(self.lvis_paths) return len(self.lvis_paths) - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - def __getitem__(self, idx): sample = {} idx = idx * 8 # to be deleted @@ -198,9 +179,8 @@ class BlenderPerView(Dataset): self.c2ws = [] self.w2cs = [] self.near_fars = [] - # self.root_dir = root_dir - for image_dix, img_id in enumerate(self.img_ids): - pose = self.input_poses[image_dix] + for image_idx, img_id in enumerate(self.img_ids): + pose = self.input_poses[image_idx] c2w = pose @ self.blender2opencv self.c2ws.append(c2w) self.w2cs.append(np.linalg.inv(c2w)) @@ -224,7 +204,6 @@ class BlenderPerView(Dataset): w2cs.append(w2c @ w2c_ref_inv) c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}') img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}') img = Image.open(img_filename) @@ -258,7 +237,6 @@ class BlenderPerView(Dataset): for vid in src_views: - # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}') img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}') img = Image.open(img_filename) img_wh = self.img_wh @@ -312,7 +290,6 @@ class BlenderPerView(Dataset): new_near_fars.append([0.95 * near, 1.05 * far]) new_depths_h.append(depth * scale_factor) - # print(new_near_fars) imgs = torch.stack(imgs).float() depths_h = np.stack(new_depths_h) masks_h = np.stack(masks_h) @@ -360,7 +337,6 @@ class BlenderPerView(Dataset): sample['view_ids'] = torch.from_numpy(np.array(view_ids)) sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - # sample['light_idx'] = torch.tensor(light_idx) sample['scan'] = shape_name sample['scale_factor'] = torch.tensor(scale_factor) diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data3_1.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data3_1.py deleted file mode 100644 index 7ce059be019a360b193c526c358057ffc9b48d1a..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data3_1.py +++ /dev/null @@ -1,414 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - # self.specific_dataset_name = 'Realfusion' - self.specific_dataset_name = 'Objaverse' - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh' - # find all subfolders - main_folder = os.path.join(root_dir, self.specific_dataset_name) - self.shape_list = os.listdir(main_folder) - self.shape_list.sort() - - # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED - - - self.lvis_paths = [] - for shape_name in self.shape_list: - self.lvis_paths.append(os.path.join(main_folder, shape_name)) - - # print("lvis_paths: ", self.lvis_paths) - - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - # return 8*len(self.lvis_paths) - return len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - idx = idx * 8 # to be deleted - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_path = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - - # last subdir name - shape_name = os.path.split(folder_path)[-1] - - - pose_json_path = os.path.join(folder_path, "pose.json") - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for image_dix, img_id in enumerate(self.img_ids): - pose = self.input_poses[image_dix] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}') - # print(self.img_ids) - img = Image.open(img_filename) - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - - depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32) - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - src_views = range(8, 8 + 8 * 4) - - for vid in src_views: - if vid % 4 == 0: - vid = (vid - 8) // 4 - img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[vid]}') - else: - img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - - - target_w2cs = [] - target_intrinsics = [] - new_target_w2cs = [] - for i_idx in range(8): - target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv) - target_intrinsics.append(self.all_intrinsics[i_idx]) - - for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_target_w2cs.append(w2c) - target_w2cs = np.stack(new_target_w2cs) - - - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = shape_name - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0]) - # print("meta: ", sample['meta']) - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_32_wide.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_32_wide.py deleted file mode 100644 index f69ece26bdd88955bf5612f2f6f66ae7f9262e19..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_32_wide.py +++ /dev/null @@ -1,465 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d - -def calc_pose(phis, thetas, size, radius = 1.2): - import torch - def normalize(vectors): - return vectors / (torch.norm(vectors, dim=-1, keepdim=True) + 1e-10) - # device = torch.device('cuda') - thetas = torch.FloatTensor(thetas) - phis = torch.FloatTensor(phis) - - centers = torch.stack([ - radius * torch.sin(thetas) * torch.sin(phis), - -radius * torch.cos(thetas) * torch.sin(phis), - radius * torch.cos(phis), - ], dim=-1) # [B, 3] - - # lookat - forward_vector = normalize(centers).squeeze(0) - up_vector = torch.FloatTensor([0, 0, 1]).unsqueeze(0).repeat(size, 1) - right_vector = normalize(torch.cross(up_vector, forward_vector, dim=-1)) - if right_vector.pow(2).sum() < 0.01: - right_vector = torch.FloatTensor([0, 1, 0]).unsqueeze(0).repeat(size, 1) - up_vector = normalize(torch.cross(forward_vector, right_vector, dim=-1)) - - poses = torch.eye(4, dtype=torch.float)[:3].unsqueeze(0).repeat(size, 1, 1) - poses[:, :3, :3] = torch.stack((right_vector, up_vector, forward_vector), dim=-1) - poses[:, :3, 3] = centers - return poses - -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[], - specific_dataset_name = 'GSO' - ): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - # self.specific_dataset_name = 'Realfusion' - # self.specific_dataset_name = 'GSO' - # self.specific_dataset_name = 'Objaverse' - # self.specific_dataset_name = 'Zero123' - - self.specific_dataset_name = specific_dataset_name - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh' - # find all subfolders - main_folder = os.path.join(root_dir) - self.shape_list = os.listdir(main_folder) - self.shape_list.sort() - - # self.shape_list = ['barrel_render'] - # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED - - - self.lvis_paths = [] - for shape_name in self.shape_list: - self.lvis_paths.append(os.path.join(main_folder, shape_name)) - - # print("lvis_paths: ", self.lvis_paths) - - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - - with open(pose_json_path, 'r') as f: - meta = json.load(f) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid in range(self.input_poses.shape[0]): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - # return 8*len(self.lvis_paths) - return len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - idx = idx * 8 # to be deleted - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views - - folder_path = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - - # last subdir name - shape_name = os.path.split(folder_path)[-1] - - # pose_json_path = os.path.join(folder_path, "pose.json") - # with open(pose_json_path, 'r') as f: - # meta = json.load(f) - - # self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - # self.img_wh = (256, 256) - # self.input_poses = np.array(list(meta["c2ws"].values())) - # intrinsic = np.eye(4) - # intrinsic[:3, :3] = np.array(meta["intrinsics"]) - # self.intrinsic = intrinsic - # self.near_far = np.array(meta["near_far"]) - # self.near_far[1] = 1.8 - # self.define_transforms() - # self.blender2opencv = np.array( - # [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - # ) - - pose_file = os.path.join(folder_path, '32_random', 'views.npz') - pose_array = np.load(pose_file) - pose = calc_pose(pose_array['elevations'], pose_array['azimuths'], 32) # [32, 3, 4] c2ws - - self.img_wh = (256, 256) - self.input_poses = np.array(pose) - self.input_poses = np.concatenate([self.input_poses, np.tile(np.array([0, 0, 0, 1], dtype=np.float32)[None, None, :], [self.input_poses.shape[0], 1, 1])], axis=1) - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for image_dix in range(pose.shape[0]): - pose = self.input_poses[image_dix] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}') - img_filename = os.path.join(folder_path, '32_random', f'{idx}.png') - - img = Image.open(img_filename) - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - - depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32) - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - - src_views = range(0, 8 * 4) - - for vid in src_views: - - # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}') - img_filename = os.path.join(folder_path, '32_random', f'{vid}.png') - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - - - target_w2cs = [] - target_intrinsics = [] - new_target_w2cs = [] - for i_idx in range(8): - target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv) - target_intrinsics.append(self.all_intrinsics[i_idx]) - - for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_target_w2cs.append(w2c) - target_w2cs = np.stack(new_target_w2cs) - - - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = shape_name - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0]) - # print("meta: ", sample['meta']) - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_4_4.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_4_4.py deleted file mode 100644 index 6263a9ff47edc8f7b65600786c244fafb809240b..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_4_4.py +++ /dev/null @@ -1,419 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d - - -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[], - specific_dataset_name = 'GSO' - ): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - # self.specific_dataset_name = 'Realfusion' - # self.specific_dataset_name = 'GSO' - # self.specific_dataset_name = 'Objaverse' - # self.specific_dataset_name = 'Zero123' - - self.specific_dataset_name = specific_dataset_name - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh' - # find all subfolders - main_folder = os.path.join(root_dir, self.specific_dataset_name) - self.shape_list = os.listdir(main_folder) - self.shape_list.sort() - - # self.shape_list = ['barrel_render'] - # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED - - - self.lvis_paths = [] - for shape_name in self.shape_list: - self.lvis_paths.append(os.path.join(main_folder, shape_name)) - - # print("lvis_paths: ", self.lvis_paths) - - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - # return 8*len(self.lvis_paths) - return len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - idx = idx * 8 # to be deleted - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views - - folder_path = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - - # last subdir name - shape_name = os.path.split(folder_path)[-1] - - pose_json_path = os.path.join(folder_path, "pose.json") - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for image_dix, img_id in enumerate(self.img_ids): - pose = self.input_poses[image_dix] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}') - img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}') - - img = Image.open(img_filename) - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - - depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32) - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - - src_views = range(8, 8 + 8 * 4) - - for vid in src_views: - if (vid // 4) % 2 != 0: - continue - # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}') - img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}') - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - - - target_w2cs = [] - target_intrinsics = [] - new_target_w2cs = [] - for i_idx in range(8): - target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv) - target_intrinsics.append(self.all_intrinsics[i_idx]) - - for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_target_w2cs.append(w2c) - target_w2cs = np.stack(new_target_w2cs) - - - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = shape_name - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0]) - # print("meta: ", sample['meta']) - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_6_4.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_6_4.py deleted file mode 100644 index c88c0d9b37402f970d9b2d7686b774943366e9a8..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_6_4.py +++ /dev/null @@ -1,420 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d - - -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[], - specific_dataset_name = 'GSO' - ): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - # self.specific_dataset_name = 'Realfusion' - # self.specific_dataset_name = 'GSO' - # self.specific_dataset_name = 'Objaverse' - # self.specific_dataset_name = 'Zero123' - - self.specific_dataset_name = specific_dataset_name - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh' - # find all subfolders - main_folder = os.path.join(root_dir, self.specific_dataset_name) - self.shape_list = os.listdir(main_folder) - self.shape_list.sort() - - # self.shape_list = ['barrel_render'] - # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED - - - self.lvis_paths = [] - for shape_name in self.shape_list: - self.lvis_paths.append(os.path.join(main_folder, shape_name)) - - # print("lvis_paths: ", self.lvis_paths) - - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - # return 8*len(self.lvis_paths) - return len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - idx = idx * 8 # to be deleted - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views - - folder_path = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - - # last subdir name - shape_name = os.path.split(folder_path)[-1] - - pose_json_path = os.path.join(folder_path, "pose.json") - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for image_dix, img_id in enumerate(self.img_ids): - pose = self.input_poses[image_dix] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}') - img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}') - - img = Image.open(img_filename) - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - - depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32) - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - - src_views = range(8, 8 + 8 * 4) - - for vid in src_views: - if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6): - continue - - # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}') - img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}') - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - - - target_w2cs = [] - target_intrinsics = [] - new_target_w2cs = [] - for i_idx in range(8): - target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv) - target_intrinsics.append(self.all_intrinsics[i_idx]) - - for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_target_w2cs.append(w2c) - target_w2cs = np.stack(new_target_w2cs) - - - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = shape_name - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0]) - # print("meta: ", sample['meta']) - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_3.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_3.py deleted file mode 100644 index 512c3db02edc8e68208167b7d1715f1f67025cdf..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_3.py +++ /dev/null @@ -1,428 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d - - -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[], - specific_dataset_name = 'GSO' - ): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - # self.specific_dataset_name = 'Realfusion' - # self.specific_dataset_name = 'GSO' - # self.specific_dataset_name = 'Objaverse' - # self.specific_dataset_name = 'Zero123' - - self.specific_dataset_name = specific_dataset_name - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh' - # find all subfolders - main_folder = os.path.join(root_dir, self.specific_dataset_name) - self.shape_list = os.listdir(main_folder) - self.shape_list.sort() - - # self.shape_list = ['barrel_render'] - # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED - - - self.lvis_paths = [] - for shape_name in self.shape_list: - self.lvis_paths.append(os.path.join(main_folder, shape_name)) - - # print("lvis_paths: ", self.lvis_paths) - - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - # return 8*len(self.lvis_paths) - return len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - idx = idx * 8 # to be deleted - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views - - folder_path = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - - # last subdir name - shape_name = os.path.split(folder_path)[-1] - - pose_json_path = os.path.join(folder_path, "pose.json") - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for image_dix, img_id in enumerate(self.img_ids): - pose = self.input_poses[image_dix] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}') - img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}') - - img = Image.open(img_filename) - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - - depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32) - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - - # src_views = range(8, 8 + 8 * 4) - - src_views = list() - for i in range(8): - # randomly choose 3 different number from [0,3] - # local_idxs = np.random.choice(4, 3, replace=False) - local_idxs = [0, 2, 3] - # local_idxs = np.random.choice(4, 3, replace=False) - - local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs] - src_views += local_idxs - - for vid in src_views: - - # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}') - img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}') - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - - - target_w2cs = [] - target_intrinsics = [] - new_target_w2cs = [] - for i_idx in range(8): - target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv) - target_intrinsics.append(self.all_intrinsics[i_idx]) - - for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_target_w2cs.append(w2c) - target_w2cs = np.stack(new_target_w2cs) - - - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = shape_name - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0]) - # print("meta: ", sample['meta']) - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_wide.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_wide.py deleted file mode 100644 index 3c1a23183a388175c2212bf552fb15ae385737ab..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_wide.py +++ /dev/null @@ -1,420 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d - - -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[], - specific_dataset_name = 'GSO' - ): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - # self.specific_dataset_name = 'Realfusion' - # self.specific_dataset_name = 'GSO' - # self.specific_dataset_name = 'Objaverse' - # self.specific_dataset_name = 'Zero123' - - self.specific_dataset_name = specific_dataset_name - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh' - # find all subfolders - main_folder = os.path.join(root_dir, self.specific_dataset_name) - self.shape_list = os.listdir(main_folder) - self.shape_list.sort() - - # self.shape_list = ['barrel_render'] - # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED - - - self.lvis_paths = [] - for shape_name in self.shape_list: - self.lvis_paths.append(os.path.join(main_folder, shape_name)) - - # print("lvis_paths: ", self.lvis_paths) - - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - # return 8*len(self.lvis_paths) - return len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - idx = idx * 8 # to be deleted - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views - - folder_path = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - - # last subdir name - shape_name = os.path.split(folder_path)[-1] - - pose_json_path = os.path.join(folder_path, "pose.json") - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for image_dix, img_id in enumerate(self.img_ids): - pose = self.input_poses[image_dix] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}') - img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}') - - img = Image.open(img_filename) - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - - depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32) - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - - src_views = range(8) - - - for vid in src_views: - - # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}') - # img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}') - img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[vid]}') - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - - - target_w2cs = [] - target_intrinsics = [] - new_target_w2cs = [] - for i_idx in range(8): - target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv) - target_intrinsics.append(self.all_intrinsics[i_idx]) - - for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_target_w2cs.append(w2c) - target_w2cs = np.stack(new_target_w2cs) - - - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = shape_name - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0]) - # print("meta: ", sample['meta']) - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_temp.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_temp.py deleted file mode 100644 index 4b2c7f6b2306cca93f476c2c233956e4cff0dcfb..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_temp.py +++ /dev/null @@ -1,417 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d - - -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[], - specific_dataset_name = 'GSO' - ): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - # self.specific_dataset_name = 'Realfusion' - # self.specific_dataset_name = 'GSO' - # self.specific_dataset_name = 'Objaverse' - self.specific_dataset_name = 'Objaverse_archived' - - # self.specific_dataset_name = specific_dataset_name - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh' - # find all subfolders - main_folder = os.path.join(root_dir, self.specific_dataset_name) - self.shape_list = os.listdir(main_folder) - self.shape_list.sort() - - # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED - - - self.lvis_paths = [] - for shape_name in self.shape_list: - self.lvis_paths.append(os.path.join(main_folder, shape_name)) - - # print("lvis_paths: ", self.lvis_paths) - - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - # return 8*len(self.lvis_paths) - return len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - idx = idx * 8 # to be deleted - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views - - folder_path = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - - # last subdir name - shape_name = os.path.split(folder_path)[-1] - - pose_json_path = os.path.join('/objaverse-processed/zero12345_img/zero12345_narrow_pose.json') - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for image_dix, img_id in enumerate(self.img_ids): - pose = self.input_poses[image_dix] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}') - img_filename = os.path.join(folder_path, 'stage1_8', f'{idx}.png') - - img = Image.open(img_filename) - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - - depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32) - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - - src_views = range(8, 8 + 8 * 4) - - for vid in src_views: - - # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}') - img_filename = os.path.join(folder_path, 'stage2_8', f'{(vid-8)//4}_{(vid-8)%4}.png') - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - - - target_w2cs = [] - target_intrinsics = [] - new_target_w2cs = [] - for i_idx in range(8): - target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv) - target_intrinsics.append(self.all_intrinsics[i_idx]) - - for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_target_w2cs.append(w2c) - target_w2cs = np.stack(new_target_w2cs) - - - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = shape_name - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0]) - # print("meta: ", sample['meta']) - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_no_depth.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_no_depth.py deleted file mode 100644 index 33a4ecf7de541049e3b89cc98f74106b59d418c7..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_no_depth.py +++ /dev/null @@ -1,388 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 8*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_uid_dict = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - # directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - # surface_points = directions * depth_h[..., None] # [H, W, 3] - # distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - # depth_h = distance - - depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32) - depth_h = depth_h.fill_(-1.0) - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - src_views = range(8, 8 + 8 * 4) - - for vid in src_views: - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4.py deleted file mode 100644 index f811326da45563ae870350f78ccdbe358411f3b6..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4.py +++ /dev/null @@ -1,389 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 4*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - idx = idx * 2 - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_uid_dict = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - src_views = range(8, 8 + 8 * 4) - - for vid in src_views: - if (vid // 4) % 2 != 0: - continue - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - # print("len(imgs)", len(imgs)) - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4_and_4.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4_and_4.py deleted file mode 100644 index 76b9fccad69f6929e086074b55807ef5a0a17eee..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4_and_4.py +++ /dev/null @@ -1,395 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - with open(pose_json_path, 'r') as f: - meta = json.load(f) - - self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10" - self.img_wh = (256, 256) - self.input_poses = np.array(list(meta["c2ws"].values())) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for idx, img_id in enumerate(self.img_ids): - pose = self.input_poses[idx] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid, img_id in enumerate(self.img_ids): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 8*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - idx = idx - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_uid_dict = self.lvis_paths[idx//8] - idx = idx % 8 # [0, 7] - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png') - - depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - # print("valid pixels", np.sum(mask_h)) - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - - src_views = range(8, 8 + 8 * 4) - - vid_list = [] - for vid in src_views: - if (vid // 4) % 2 != idx % 2: - continue - vid_list.append(vid) - img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # print("idx:", idx) - # print("len(imgs)", len(imgs)) - # print("vid_list", vid_list) - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/blender_gt_32.py b/SparseNeuS_demo_v1/data/blender_gt_32.py deleted file mode 100644 index 9ec6f0075febfcd46061e61ae10cd68b05dfb5fc..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/blender_gt_32.py +++ /dev/null @@ -1,419 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image -import json -from termcolor import colored -import imageio -from kornia import create_meshgrid -import open3d as o3d -def get_ray_directions(H, W, focal, center=None): - """ - Get ray directions for all pixels in camera coordinate. - Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/ - ray-tracing-generating-camera-rays/standard-coordinate-systems - Inputs: - H, W, focal: image height, width and focal length - Outputs: - directions: (H, W, 3), the direction of the rays in camera coordinate - """ - grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2 - - i, j = grid.unbind(-1) - # the direction here is without +0.5 pixel centering as calibration is not so accurate - # see https://github.com/bmild/nerf/issues/24 - cent = center if center is not None else [W / 2, H / 2] - directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3) - - return directions - -import os, json -import numpy as np -def calc_pose(phis, thetas, size, radius = 1.2): - import torch - def normalize(vectors): - return vectors / (torch.norm(vectors, dim=-1, keepdim=True) + 1e-10) - # device = torch.device('cuda') - thetas = torch.FloatTensor(thetas) - phis = torch.FloatTensor(phis) - - centers = torch.stack([ - radius * torch.sin(thetas) * torch.sin(phis), - -radius * torch.cos(thetas) * torch.sin(phis), - radius * torch.cos(phis), - ], dim=-1) # [B, 3] - - # lookat - forward_vector = normalize(centers).squeeze(0) - up_vector = torch.FloatTensor([0, 0, 1]).unsqueeze(0).repeat(size, 1) - right_vector = normalize(torch.cross(up_vector, forward_vector, dim=-1)) - if right_vector.pow(2).sum() < 0.01: - right_vector = torch.FloatTensor([0, 1, 0]).unsqueeze(0).repeat(size, 1) - up_vector = normalize(torch.cross(forward_vector, right_vector, dim=-1)) - - poses = torch.eye(4, dtype=torch.float)[:3].unsqueeze(0).repeat(size, 1, 1) - poses[:, :3, :3] = torch.stack((right_vector, up_vector, forward_vector), dim=-1) - poses[:, :3, 3] = centers - return poses - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class BlenderPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - # print("root_dir: ", root_dir) - self.root_dir = root_dir - self.split = split - - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - lvis_json_path = '/objaverse-processed/zero12345_img/random32_split.json' # folder_id and uid - with open(lvis_json_path, 'r') as f: - lvis_paths = json.load(f) - if self.split == 'train': - self.lvis_paths = lvis_paths['train'] - else: - self.lvis_paths = lvis_paths['val'] - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json" - - with open(pose_json_path, 'r') as f: - meta = json.load(f) - intrinsic = np.eye(4) - intrinsic[:3, :3] = np.array(meta["intrinsics"]) - self.intrinsic = intrinsic - self.near_far = np.array(meta["near_far"]) - self.near_far[1] = 1.8 - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32) - - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - - - def load_cam_info(self): - for vid in range(self.input_poses.shape[0]): - intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - pass - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - - center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - # print("center", center) - # print("radius", radius) - # print("bounds", bounds) - # import ipdb; ipdb.set_trace() - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return 32*len(self.lvis_paths) - - - def read_depth(self, filename, near_bound, noisy_factor=1.0): - pass - - - def __getitem__(self, idx): - sample = {} - origin_idx = idx - imgs, depths_h, masks_h = [], [], [] # full size (256, 256) - intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views - - - folder_uid_dict = self.lvis_paths[idx//32] - idx = idx % 32 # [0, 7] - folder_id = folder_uid_dict['folder_id'] - uid = folder_uid_dict['uid'] - - pose_file = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, 'views.npz') - pose_array = np.load(pose_file) - pose = calc_pose(pose_array['elevations'], pose_array['azimuths'], 32) # [32, 3, 4] c2ws - - self.img_wh = (256, 256) - self.input_poses = np.array(pose) - self.input_poses = np.concatenate([self.input_poses, np.tile(np.array([0, 0, 0, 1], dtype=np.float32)[None, None, :], [self.input_poses.shape[0], 1, 1])], axis=1) - self.define_transforms() - self.blender2opencv = np.array( - [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]] - ) - - self.c2ws = [] - self.w2cs = [] - self.near_fars = [] - # self.root_dir = root_dir - for image_dix in range(pose.shape[0]): - pose = self.input_poses[image_dix] - c2w = pose @ self.blender2opencv - self.c2ws.append(c2w) - self.w2cs.append(np.linalg.inv(c2w)) - self.near_fars.append(self.near_far) - self.c2ws = np.stack(self.c2ws, axis=0) - self.w2cs = np.stack(self.w2cs, axis=0) - - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - self.load_cam_info() - - - - # target view - c2w = self.c2ws[idx] - w2c = np.linalg.inv(c2w) - w2c_ref = w2c - w2c_ref_inv = np.linalg.inv(w2c_ref) - - w2cs.append(w2c @ w2c_ref_inv) - c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv)) - - img_filename = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{idx}.png') - - depth_filename = os.path.join(os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{idx}_depth_mm.png')) - - - img = Image.open(img_filename) - - img = self.transform(img) # (4, h, w) - - - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - imgs += [img] - - depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0 - mask_h = depth_h > 0 - - directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3] - surface_points = directions * depth_h[..., None] # [H, W, 3] - distance = np.linalg.norm(surface_points, axis=-1) # [H, W] - depth_h = distance - - - depths_h.append(depth_h) - masks_h.append(mask_h) - - intrinsic = self.intrinsic - intrinsics.append(intrinsic) - - - near_fars.append(self.near_fars[idx]) - image_perm = 0 # only supervised on reference view - - mask_dilated = None - - # src_views = range(8+idx*4, 8+(idx+1)*4) - src_views = range(0, 8 * 4) - - for vid in src_views: - img_filename = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{vid}.png') - - img = Image.open(img_filename) - img_wh = self.img_wh - - img = self.transform(img) - if img.shape[0] == 4: - img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB - - imgs += [img] - depth_h = np.ones(img.shape[1:], dtype=np.float32) - depths_h.append(depth_h) - masks_h.append(np.ones(img.shape[1:], dtype=np.int32)) - - near_fars.append(self.all_near_fars[vid]) - intrinsics.append(self.all_intrinsics[vid]) - - w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv) - - - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat( - img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1 - ) - - - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - # print(new_near_fars) - imgs = torch.stack(imgs).float() - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if self.split == 'train': - start_idx = 0 - else: - start_idx = 1 - - view_ids = [idx] + list(src_views) - sample['origin_idx'] = origin_idx - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - # sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = folder_id - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(img_wh)) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = self.partial_vol_origin - sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0]) - - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/dtu/dtu_pairs.txt b/SparseNeuS_demo_v1/data/dtu/dtu_pairs.txt deleted file mode 100644 index bd0d79868f196991c06ec2a496dbe06e5ded0fd2..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/dtu/dtu_pairs.txt +++ /dev/null @@ -1,93 +0,0 @@ -46 -0 -10 10 2346.410000 1 2036.530000 9 1243.890000 12 1052.870000 11 1000.840000 13 703.583000 2 604.456000 8 439.759000 14 327.419000 27 249.278000 -1 -10 9 2850.870000 10 2583.940000 2 2105.590000 0 2052.840000 8 1868.240000 13 1184.230000 14 1017.510000 12 961.966000 7 670.208000 15 657.218000 -2 -10 8 2501.240000 1 2106.880000 7 1856.500000 9 1782.340000 3 1141.770000 15 1061.760000 14 815.457000 16 762.153000 6 709.789000 10 699.921000 -3 -10 7 1294.390000 6 1159.130000 2 1134.270000 4 905.717000 8 687.320000 5 600.015000 17 496.958000 16 481.969000 1 379.011000 15 307.450000 -4 -10 5 1333.740000 6 1145.150000 3 895.254000 7 486.504000 18 446.420000 2 418.517000 17 326.528000 8 161.115000 16 149.154000 1 103.626000 -5 -10 6 1676.060000 18 1555.060000 4 1335.550000 17 868.416000 3 593.755000 7 467.816000 20 440.579000 19 428.255000 16 242.327000 21 210.253000 -6 -10 17 2332.350000 7 1848.240000 18 1812.740000 5 1696.070000 16 1273.000000 3 1157.990000 4 1155.410000 20 771.624000 21 744.945000 2 700.368000 -7 -10 16 2709.460000 8 2439.700000 15 2078.210000 6 1864.160000 2 1846.600000 17 1791.710000 3 1296.860000 22 957.793000 9 879.088000 21 782.277000 -8 -10 15 3124.010000 9 3099.920000 14 2756.290000 2 2501.220000 7 2449.320000 1 1875.940000 16 1726.040000 13 1325.760000 23 1177.090000 24 1108.820000 -9 -10 13 3355.620000 14 3226.070000 8 3098.800000 10 3097.070000 1 2861.420000 12 1873.630000 2 1785.980000 15 1753.320000 25 1365.450000 0 1261.590000 -10 -10 12 3750.700000 9 3085.870000 13 3028.390000 1 2590.550000 0 2369.790000 11 2266.670000 14 1524.160000 26 1448.150000 27 1293.600000 8 1041.840000 -11 -10 12 3543.760000 27 3056.050000 10 2248.070000 26 1524.280000 28 1273.330000 13 1265.900000 29 1129.550000 0 998.164000 9 591.176000 30 572.919000 -12 -10 27 3889.870000 10 3754.540000 13 3745.210000 11 3584.260000 26 3574.560000 25 1877.110000 9 1866.340000 29 1482.720000 30 1418.510000 14 1341.860000 -13 -10 12 3773.140000 26 3699.280000 25 3657.170000 14 3652.040000 9 3356.290000 10 3049.270000 24 2098.910000 27 1900.960000 31 1460.960000 30 1349.620000 -14 -10 13 3663.520000 24 3610.690000 9 3232.550000 25 3216.400000 15 3128.840000 8 2758.040000 23 2219.910000 26 1567.450000 10 1536.600000 32 1419.330000 -15 -10 23 3194.920000 14 3126.000000 8 3120.430000 16 2897.020000 24 2562.490000 7 2084.050000 22 2041.630000 9 1752.080000 33 1232.290000 13 1137.550000 -16 -10 15 2884.140000 7 2713.880000 22 2708.570000 17 2448.500000 21 2173.300000 23 1908.030000 8 1718.790000 6 1281.960000 35 1047.380000 34 980.064000 -17 -10 21 2632.480000 16 2428.000000 6 2343.570000 18 2250.230000 20 2149.750000 7 1779.420000 22 1380.250000 36 957.046000 5 878.398000 15 789.068000 -18 -9 17 2219.150000 20 2173.020000 6 1802.390000 19 1575.770000 5 1564.810000 21 1160.130000 16 660.317000 7 589.484000 36 559.983000 -19 -7 20 1828.970000 18 1564.630000 17 685.249000 36 613.420000 21 572.770000 5 427.597000 6 368.651000 -20 -8 21 2569.790000 36 2258.330000 18 2186.710000 17 2130.670000 19 1865.060000 35 996.122000 16 799.808000 40 778.721000 -21 -9 36 2704.590000 35 2639.690000 17 2638.190000 20 2605.430000 22 2604.260000 16 2158.250000 34 1239.250000 18 1178.240000 40 1128.570000 -22 -10 23 3232.680000 34 3175.150000 35 2831.090000 16 2712.510000 21 2632.190000 15 2033.390000 33 1712.670000 17 1393.860000 36 1290.960000 24 1195.330000 -23 -10 24 3710.900000 33 3603.070000 22 3244.200000 15 3190.620000 34 3086.490000 14 2220.110000 32 2100.000000 16 1917.100000 35 1359.790000 25 1356.710000 -24 -10 25 3844.600000 32 3750.750000 23 3710.600000 14 3609.090000 33 3091.040000 15 2559.240000 31 2423.710000 13 2109.360000 26 1440.580000 34 1410.030000 -25 -10 26 3951.740000 31 3888.570000 24 3833.070000 13 3667.350000 14 3208.210000 32 2993.460000 30 2681.520000 12 1900.230000 45 1484.030000 27 1462.880000 -26 -10 30 4033.350000 27 3970.470000 25 3925.250000 13 3686.340000 12 3595.590000 29 2943.870000 31 2917.000000 14 1556.340000 11 1554.750000 46 1503.840000 -27 -10 29 4027.840000 26 3929.940000 12 3875.580000 11 3085.030000 28 2908.600000 30 2792.670000 13 1878.420000 25 1438.550000 47 1425.200000 10 1290.250000 -28 -10 29 3687.020000 48 3209.130000 27 2872.860000 47 2014.530000 30 1361.950000 11 1273.600000 26 1062.850000 12 840.841000 46 672.985000 31 271.952000 -29 -10 27 4029.430000 30 3909.550000 28 3739.930000 47 3695.230000 48 3135.870000 26 2910.970000 46 2229.550000 12 1479.160000 31 1430.260000 11 1144.560000 -30 -10 26 4029.860000 29 3953.720000 31 3811.120000 46 3630.460000 47 3105.960000 27 2824.430000 25 2657.890000 45 2347.750000 32 1459.110000 12 1429.620000 -31 -10 25 3882.210000 30 3841.880000 32 3808.500000 45 3649.820000 46 3000.670000 26 2939.940000 24 2409.930000 44 2381.300000 13 1467.590000 29 1459.560000 -32 -10 31 3826.500000 24 3744.140000 33 3613.240000 44 3552.040000 25 3004.600000 45 2884.590000 43 2393.340000 23 2095.270000 30 1478.600000 14 1420.780000 -33 -10 32 3618.110000 23 3598.100000 34 3530.530000 43 3462.370000 24 3091.530000 44 2608.080000 42 2426.000000 22 1717.940000 31 1407.650000 25 1324.780000 -34 -10 33 3523.370000 42 3356.550000 35 3210.340000 22 3178.850000 23 3079.030000 43 2396.450000 41 2386.860000 24 1408.020000 32 1301.340000 21 1256.450000 -35 -10 34 3187.880000 41 3106.440000 36 2866.040000 22 2817.740000 21 2654.870000 40 2416.980000 42 2137.810000 23 1346.860000 33 1150.330000 16 1044.660000 -36 -8 40 2910.700000 35 2832.660000 21 2689.960000 20 2280.460000 41 1787.970000 22 1268.490000 34 981.636000 17 954.229000 -40 -7 36 2918.140000 41 2852.620000 35 2392.960000 21 1124.300000 42 1056.480000 34 877.946000 20 788.701000 -41 -9 35 3111.050000 42 3049.710000 40 2885.360000 34 2371.020000 36 1813.690000 43 1164.710000 22 1126.900000 21 906.536000 33 903.238000 -42 -10 34 3356.980000 43 3183.000000 41 3070.540000 33 2421.770000 35 2155.080000 44 1278.410000 23 1183.520000 22 1147.070000 40 1077.080000 32 899.646000 -43 -10 33 3461.240000 44 3380.740000 42 3188.700000 34 2400.600000 32 2399.090000 45 1359.370000 23 1314.080000 41 1176.120000 24 1159.620000 31 901.556000 -44 -10 32 3550.810000 45 3510.160000 43 3373.110000 33 2602.330000 31 2395.930000 24 1410.430000 46 1386.310000 42 1279.000000 25 1095.240000 34 968.440000 -45 -10 31 3650.090000 46 3555.090000 44 3491.150000 32 2868.390000 30 2373.590000 25 1485.370000 47 1405.280000 43 1349.540000 33 1104.770000 26 1046.810000 -46 -10 30 3635.640000 47 3562.170000 45 3524.170000 31 2976.820000 29 2264.040000 26 1508.870000 44 1367.410000 48 1352.100000 32 1211.240000 25 1102.170000 -47 -10 29 3705.310000 46 3519.760000 48 3450.480000 30 3074.770000 28 2054.630000 27 1434.570000 45 1377.340000 31 1268.230000 26 1223.830000 25 471.111000 -48 -10 47 3401.950000 28 3224.840000 29 3101.160000 46 1317.100000 30 1306.700000 27 1235.070000 26 537.731000 31 291.919000 45 276.869000 11 258.856000 diff --git a/SparseNeuS_demo_v1/data/dtu/lists/test.txt b/SparseNeuS_demo_v1/data/dtu/lists/test.txt deleted file mode 100644 index b1420254bbe0fe15e9ad9358cdbaedf34605a558..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/dtu/lists/test.txt +++ /dev/null @@ -1,15 +0,0 @@ -scan24 -scan37 -scan40 -scan55 -scan63 -scan65 -scan69 -scan83 -scan97 -scan105 -scan106 -scan110 -scan114 -scan118 -scan122 \ No newline at end of file diff --git a/SparseNeuS_demo_v1/data/dtu/lists/train.txt b/SparseNeuS_demo_v1/data/dtu/lists/train.txt deleted file mode 100644 index 4259e846edcee621baf19875e2900e169849f5e3..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/dtu/lists/train.txt +++ /dev/null @@ -1,75 +0,0 @@ -scan1 -scan4 -scan5 -scan6 -scan8 -scan9 -scan10 -scan11 -scan12 -scan13 -scan14 -scan15 -scan16 -scan17 -scan18 -scan19 -scan20 -scan21 -scan22 -scan23 -scan28 -scan29 -scan30 -scan31 -scan32 -scan33 -scan34 -scan35 -scan36 -scan38 -scan39 -scan41 -scan42 -scan43 -scan44 -scan45 -scan46 -scan47 -scan48 -scan49 -scan50 -scan51 -scan52 -scan59 -scan60 -scan61 -scan62 -scan64 -scan74 -scan75 -scan76 -scan77 -scan84 -scan85 -scan86 -scan87 -scan88 -scan89 -scan90 -scan91 -scan92 -scan93 -scan94 -scan95 -scan96 -scan98 -scan99 -scan100 -scan101 -scan102 -scan103 -scan104 -scan126 -scan127 -scan128 \ No newline at end of file diff --git a/SparseNeuS_demo_v1/data/dtu_fit.py b/SparseNeuS_demo_v1/data/dtu_fit.py deleted file mode 100644 index e4a97d28b635a9158c49e2a651c7799ad1009021..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/dtu_fit.py +++ /dev/null @@ -1,278 +0,0 @@ -import torch -import torch.nn as nn -import cv2 as cv -import numpy as np -import re -import os -import logging -from glob import glob - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image - -from data.scene import get_boundingbox - - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -class DtuFit: - def __init__(self, root_dir, split, scan_id, n_views, train_img_idx=[], test_img_idx=[], - img_wh=[800, 600], clip_wh=[0, 0], original_img_wh=[1600, 1200], - N_rays=512, h_patch_size=5, near=425, far=900): - super(DtuFit, self).__init__() - logging.info('Load data: Begin') - - self.root_dir = root_dir - self.split = split - self.scan_id = scan_id - self.n_views = n_views - - self.near = near - self.far = far - - if self.scan_id is not None: - self.data_dir = os.path.join(self.root_dir, self.scan_id) - else: - self.data_dir = self.root_dir - - self.img_wh = img_wh - self.clip_wh = clip_wh - - if len(self.clip_wh) == 2: - self.clip_wh = self.clip_wh + self.clip_wh - - self.original_img_wh = original_img_wh - self.N_rays = N_rays - self.h_patch_size = h_patch_size # used to extract patch for supervision - self.train_img_idx = train_img_idx - self.test_img_idx = test_img_idx - - camera_dict = np.load(os.path.join(self.data_dir, 'cameras.npz'), allow_pickle=True) - self.images_list = sorted(glob(os.path.join(self.data_dir, "image/*.png"))) - # world_mat: projection matrix: world to image - self.world_mats_np = [camera_dict['world_mat_%d' % idx].astype(np.float32) for idx in - range(len(self.images_list))] - - self.raw_near_fars = np.stack([np.array([self.near, self.far]) for i in range(len(self.images_list))]) - - # - reference image; transform the world system to the ref-camera system - self.ref_img_idx = self.train_img_idx[0] - ref_world_mat = self.world_mats_np[self.ref_img_idx] - self.ref_w2c = np.linalg.inv(load_K_Rt_from_P(None, ref_world_mat[:3, :4])[1]) - - self.all_images = [] - self.all_intrinsics = [] - self.all_w2cs = [] - - self.load_scene() # load the scene - - # ! estimate scale_mat - self.scale_mat, self.scale_factor = self.cal_scale_mat( - img_hw=[self.img_wh[1], self.img_wh[0]], - intrinsics=self.all_intrinsics[self.train_img_idx], - extrinsics=self.all_w2cs[self.train_img_idx], - near_fars=self.raw_near_fars[self.train_img_idx], - factor=1.1) - - # * after scaling and translation, unit bounding box - self.scaled_intrinsics, self.scaled_w2cs, self.scaled_c2ws, \ - self.scaled_affine_mats, self.scaled_near_fars = self.scale_cam_info() - # import ipdb; ipdb.set_trace() - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - self.partial_vol_origin = torch.Tensor([-1., -1., -1.]) - - logging.info('Load data: End') - - def load_scene(self): - - scale_x = self.img_wh[0] / self.original_img_wh[0] - scale_y = self.img_wh[1] / self.original_img_wh[1] - - for idx in range(len(self.images_list)): - image = cv.imread(self.images_list[idx]) - image = cv.resize(image, (self.img_wh[0], self.img_wh[1])) / 255. - - image = image[self.clip_wh[1]:self.img_wh[1] - self.clip_wh[3], - self.clip_wh[0]:self.img_wh[0] - self.clip_wh[2]] - self.all_images.append(np.transpose(image[:, :, ::-1], (2, 0, 1))) # append [3,] - - P = self.world_mats_np[idx] - P = P[:3, :4] - intrinsics, c2w = load_K_Rt_from_P(None, P) - w2c = np.linalg.inv(c2w) - - intrinsics[:1] *= scale_x - intrinsics[1:2] *= scale_y - - intrinsics[0, 2] -= self.clip_wh[0] - intrinsics[1, 2] -= self.clip_wh[1] - - self.all_intrinsics.append(intrinsics) - # - transform from world system to ref-camera system - self.all_w2cs.append(w2c @ np.linalg.inv(self.ref_w2c)) - - - self.all_images = torch.from_numpy(np.stack(self.all_images)).to(torch.float32) - self.all_intrinsics = torch.from_numpy(np.stack(self.all_intrinsics)).to(torch.float32) - self.all_w2cs = torch.from_numpy(np.stack(self.all_w2cs)).to(torch.float32) - self.img_wh = [self.img_wh[0] - self.clip_wh[0] - self.clip_wh[2], - self.img_wh[1] - self.clip_wh[1] - self.clip_wh[3]] - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def scale_cam_info(self): - new_intrinsics = [] - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - for idx in range(len(self.all_images)): - intrinsics = self.all_intrinsics[idx] - P = intrinsics @ self.all_w2cs[idx] @ self.scale_mat - P = P.cpu().numpy()[:3, :4] - - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - new_intrinsics.append(intrinsics) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsics[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - - new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars = \ - np.stack(new_intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), \ - np.stack(new_affine_mats), np.stack(new_near_fars) - - new_intrinsics = torch.from_numpy(np.float32(new_intrinsics)) - new_w2cs = torch.from_numpy(np.float32(new_w2cs)) - new_c2ws = torch.from_numpy(np.float32(new_c2ws)) - new_affine_mats = torch.from_numpy(np.float32(new_affine_mats)) - new_near_fars = torch.from_numpy(np.float32(new_near_fars)) - - return new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars - - - def get_conditional_sample(self): - sample = {} - support_idxs = self.train_img_idx - - sample['images'] = self.all_images[support_idxs] # (V, 3, H, W) - sample['w2cs'] = self.scaled_w2cs[self.train_img_idx] # (V, 4, 4) - sample['c2ws'] = self.scaled_c2ws[self.train_img_idx] # (V, 4, 4) - sample['near_fars'] = self.scaled_near_fars[self.train_img_idx] # (V, 2) - sample['intrinsics'] = self.scaled_intrinsics[self.train_img_idx][:, :3, :3] # (V, 3, 3) - sample['affine_mats'] = self.scaled_affine_mats[self.train_img_idx] # ! in world space - - sample['scan'] = self.scan_id - sample['scale_factor'] = torch.tensor(self.scale_factor) - sample['scale_mat'] = torch.from_numpy(self.scale_mat) - sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c)) - sample['img_wh'] = torch.from_numpy(np.array(self.img_wh)) - sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32) - - return sample - - def __len__(self): - if self.split == 'train': - return self.n_views * 1000 - else: - return len(self.test_img_idx) * 1000 - - def __getitem__(self, idx): - sample = {} - - if self.split == 'train': - render_idx = self.train_img_idx[idx % self.n_views] - support_idxs = [idx for idx in self.train_img_idx if idx != render_idx] - else: - # render_idx = idx % self.n_test_images + self.n_train_images - render_idx = self.test_img_idx[idx % len(self.test_img_idx)] - support_idxs = [render_idx] - - sample['images'] = self.all_images[support_idxs] # (V, 3, H, W) - sample['w2cs'] = self.scaled_w2cs[support_idxs] # (V, 4, 4) - sample['c2ws'] = self.scaled_c2ws[support_idxs] # (V, 4, 4) - sample['intrinsics'] = self.scaled_intrinsics[support_idxs][:, :3, :3] # (V, 3, 3) - sample['affine_mats'] = self.scaled_affine_mats[support_idxs] # ! in world space - sample['scan'] = self.scan_id - sample['scale_factor'] = torch.tensor(self.scale_factor) - sample['img_wh'] = torch.from_numpy(np.array(self.img_wh)) - sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32) - sample['img_index'] = torch.tensor(render_idx) - - # - query image - sample['query_image'] = self.all_images[render_idx] - sample['query_c2w'] = self.scaled_c2ws[render_idx] - sample['query_w2c'] = self.scaled_w2cs[render_idx] - sample['query_intrinsic'] = self.scaled_intrinsics[render_idx] - sample['query_near_far'] = self.scaled_near_fars[render_idx] - sample['meta'] = str(self.scan_id) + "_" + os.path.basename(self.images_list[render_idx]) - sample['scale_mat'] = torch.from_numpy(self.scale_mat) - sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c)) - sample['rendering_c2ws'] = self.scaled_c2ws[self.test_img_idx] - sample['rendering_imgs_idx'] = torch.Tensor(np.array(self.test_img_idx).astype(np.int32)) - - # - generate rays - if self.split == 'val' or self.split == 'test': - sample_rays = gen_rays_from_single_image( - self.img_wh[1], self.img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=None, - mask=None) - else: - sample_rays = gen_random_rays_from_single_image( - self.img_wh[1], self.img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=None, - mask=None, - dilated_mask=None, - importance_sample=False, - h_patch_size=self.h_patch_size - ) - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/dtu_general.py b/SparseNeuS_demo_v1/data/dtu_general.py deleted file mode 100644 index c6c7734df6072dd618ccdde71ca428f983a605e8..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/data/dtu_general.py +++ /dev/null @@ -1,376 +0,0 @@ -from torch.utils.data import Dataset -from utils.misc_utils import read_pfm -import os -import numpy as np -import cv2 -from PIL import Image -import torch -from torchvision import transforms as T -from data.scene import get_boundingbox - -from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image - -from termcolor import colored -import pdb -import random - - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv2.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -# ! load one ref-image with multiple src-images in camera coordinate system -class MVSDatasetDtuPerView(Dataset): - def __init__(self, root_dir, split, n_views=3, img_wh=(640, 512), downSample=1.0, - split_filepath=None, pair_filepath=None, - N_rays=512, - vol_dims=[128, 128, 128], batch_size=1, - clean_image=False, importance_sample=False, test_ref_views=[]): - - self.root_dir = root_dir - self.split = split - - self.img_wh = img_wh - self.downSample = downSample - self.num_all_imgs = 49 # this preprocessed DTU dataset has 49 images - self.n_views = n_views - self.N_rays = N_rays - self.batch_size = batch_size # - used for construct new metas for gru fusion training - - self.clean_image = clean_image - self.importance_sample = importance_sample - self.test_ref_views = test_ref_views # used for testing - self.scale_factor = 1.0 - self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0])) - - if img_wh is not None: - assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \ - 'img_wh must both be multiples of 32!' - - self.split_filepath = f'data/dtu/lists/{self.split}.txt' if split_filepath is None else split_filepath - self.pair_filepath = f'data/dtu/dtu_pairs.txt' if pair_filepath is None else pair_filepath - - print(colored("loading all scenes together", 'red')) - with open(self.split_filepath) as f: - self.scans = [line.rstrip() for line in f.readlines()] - - self.all_intrinsics = [] # the cam info of the whole scene - self.all_extrinsics = [] - self.all_near_fars = [] - - self.metas, self.ref_src_pairs = self.build_metas() # load ref-srcs view pairs info of the scene - - self.allview_ids = [i for i in range(self.num_all_imgs)] - - self.load_cam_info() # load camera info of DTU, and estimate scale_mat - - self.build_remap() - self.define_transforms() - print(f'==> image down scale: {self.downSample}') - - # * bounding box for rendering - self.bbox_min = np.array([-1.0, -1.0, -1.0]) - self.bbox_max = np.array([1.0, 1.0, 1.0]) - - # - used for cost volume regularization - self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32) - self.partial_vol_origin = torch.Tensor([-1., -1., -1.]) - - def build_remap(self): - self.remap = np.zeros(np.max(self.allview_ids) + 1).astype('int') - for i, item in enumerate(self.allview_ids): - self.remap[item] = i - - def define_transforms(self): - self.transform = T.Compose([T.ToTensor()]) - - def build_metas(self): - metas = [] - ref_src_pairs = {} - # light conditions 0-6 for training - # light condition 3 for testing (the brightest?) - light_idxs = [3] if 'train' not in self.split else range(7) - - with open(self.pair_filepath) as f: - num_viewpoint = int(f.readline()) - # viewpoints (49) - for _ in range(num_viewpoint): - ref_view = int(f.readline().rstrip()) - src_views = [int(x) for x in f.readline().rstrip().split()[1::2]] - - ref_src_pairs[ref_view] = src_views - - for light_idx in light_idxs: - for scan in self.scans: - with open(self.pair_filepath) as f: - num_viewpoint = int(f.readline()) - # viewpoints (49) - for _ in range(num_viewpoint): - ref_view = int(f.readline().rstrip()) - src_views = [int(x) for x in f.readline().rstrip().split()[1::2]] - - # ! only for validation - if len(self.test_ref_views) > 0 and ref_view not in self.test_ref_views: - continue - - metas += [(scan, light_idx, ref_view, src_views)] - - return metas, ref_src_pairs - - def read_cam_file(self, filename): - with open(filename) as f: - lines = [line.rstrip() for line in f.readlines()] - # extrinsics: line [1,5), 4x4 matrix - extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ') - extrinsics = extrinsics.reshape((4, 4)) - # intrinsics: line [7-10), 3x3 matrix - intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ') - intrinsics = intrinsics.reshape((3, 3)) - # depth_min & depth_interval: line 11 - depth_min = float(lines[11].split()[0]) - depth_max = depth_min + float(lines[11].split()[1]) * 192 - self.depth_interval = float(lines[11].split()[1]) - intrinsics_ = np.float32(np.diag([1, 1, 1, 1])) - intrinsics_[:3, :3] = intrinsics - return intrinsics_, extrinsics, [depth_min, depth_max] - - def load_cam_info(self): - for vid in range(self.num_all_imgs): - proj_mat_filename = os.path.join(self.root_dir, - f'Cameras/train/{vid:08d}_cam.txt') - intrinsic, extrinsic, near_far = self.read_cam_file(proj_mat_filename) - intrinsic[:2] *= 4 # * the provided intrinsics is 4x downsampled, now keep the same scale with image - self.all_intrinsics.append(intrinsic) - self.all_extrinsics.append(extrinsic) - self.all_near_fars.append(near_far) - - def read_depth(self, filename): - # import ipdb; ipdb.set_trace() - depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600) - depth_h = np.ones((1200, 1600)) - # print(depth_h.shape) - depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5, - interpolation=cv2.INTER_NEAREST) # (600, 800) - depth_h = depth_h[44:556, 80:720] # (512, 640) - # print(depth_h.shape) - # import ipdb; ipdb.set_trace() - depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4, - interpolation=cv2.INTER_NEAREST) - - return depth, depth_h - - def read_mask(self, filename): - mask_h = cv2.imread(filename, 0) - mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample, - interpolation=cv2.INTER_NEAREST) - mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25, - interpolation=cv2.INTER_NEAREST) - - mask[mask > 0] = 1 # the masks stored in png are not binary - mask_h[mask_h > 0] = 1 - - return mask, mask_h - - def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.): - center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars) - radius = radius * factor - scale_mat = np.diag([radius, radius, radius, 1.0]) - scale_mat[:3, 3] = center.cpu().numpy() - scale_mat = scale_mat.astype(np.float32) - - return scale_mat, 1. / radius.cpu().numpy() - - def __len__(self): - return len(self.metas) - - def __getitem__(self, idx): - sample = {} - scan, light_idx, ref_view, src_views = self.metas[idx % len(self.metas)] - - # generalized, load some images at once - view_ids = [ref_view] + src_views[:self.n_views] - # * transform from world system to camera system - w2c_ref = self.all_extrinsics[self.remap[ref_view]] - w2c_ref_inv = np.linalg.inv(w2c_ref) - - image_perm = 0 # only supervised on reference view - - imgs, depths_h, masks_h = [], [], [] # full size (640, 512) - intrinsics, w2cs, near_fars = [], [], [] # record proj mats between views - mask_dilated = None - for i, vid in enumerate(view_ids): - # NOTE that the id in image file names is from 1 to 49 (not 0~48) - img_filename = os.path.join(self.root_dir, - f'Rectified/{scan}_train/rect_{vid + 1:03d}_{light_idx}_r5000.png') - depth_filename = os.path.join(self.root_dir, - f'Depths/{scan}_train/depth_map_{vid:04d}.pfm') - # print(depth_filename) - mask_filename = os.path.join(self.root_dir, - f'Masks_clean_dilated/{scan}_train/mask_{vid:04d}.png') - - img = Image.open(img_filename) - img_wh = np.round(np.array(img.size) * self.downSample).astype('int') - img = img.resize(img_wh, Image.BILINEAR) - - if os.path.exists(mask_filename) and self.clean_image: - mask_l, mask_h = self.read_mask(mask_filename) - else: - # print(self.split, "don't find mask file", mask_filename) - mask_h = np.ones([img_wh[1], img_wh[0]]) - masks_h.append(mask_h) - - if i == 0: - kernel_size = 101 # default 101 - kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size)) - mask_dilated = np.float32(cv2.dilate(np.uint8(mask_h * 255), kernel, iterations=1) > 128) - - if self.clean_image: - img = np.array(img) - img[mask_h < 0.5] = 0.0 - - img = self.transform(img) - - imgs += [img] - - index_mat = self.remap[vid] - near_fars.append(self.all_near_fars[index_mat]) - intrinsics.append(self.all_intrinsics[index_mat]) - - w2cs.append(self.all_extrinsics[index_mat] @ w2c_ref_inv) - - # print(depth_filename) - if os.path.exists(depth_filename): # and i == 0 - # print("file exists") - depth_l, depth_h = self.read_depth(depth_filename) - depths_h.append(depth_h) - # ! estimate scale_mat - scale_mat, scale_factor = self.cal_scale_mat(img_hw=[img_wh[1], img_wh[0]], - intrinsics=intrinsics, extrinsics=w2cs, - near_fars=near_fars, factor=1.1) - - # ! calculate the new w2cs after scaling - new_near_fars = [] - new_w2cs = [] - new_c2ws = [] - new_affine_mats = [] - new_depths_h = [] - for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h): - P = intrinsic @ extrinsic @ scale_mat - P = P[:3, :4] - # - should use load_K_Rt_from_P() to obtain c2w - c2w = load_K_Rt_from_P(None, P)[1] - w2c = np.linalg.inv(c2w) - new_w2cs.append(w2c) - new_c2ws.append(c2w) - affine_mat = np.eye(4) - affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4] - new_affine_mats.append(affine_mat) - - camera_o = c2w[:3, 3] - dist = np.sqrt(np.sum(camera_o ** 2)) - near = dist - 1 - far = dist + 1 - - new_near_fars.append([0.95 * near, 1.05 * far]) - new_depths_h.append(depth * scale_factor) - - imgs = torch.stack(imgs).float() - print(new_near_fars) - depths_h = np.stack(new_depths_h) - masks_h = np.stack(masks_h) - - affine_mats = np.stack(new_affine_mats) - intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack( - new_near_fars) - - if 'train' in self.split: - start_idx = 0 - else: - start_idx = 1 - - sample['images'] = imgs # (V, 3, H, W) - sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W) - sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W) - sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4) - sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4) - sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2) - sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3) - sample['view_ids'] = torch.from_numpy(np.array(view_ids)) - sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space - - sample['light_idx'] = torch.tensor(light_idx) - sample['scan'] = scan - - sample['scale_factor'] = torch.tensor(scale_factor) - sample['img_wh'] = torch.from_numpy(img_wh) - sample['render_img_idx'] = torch.tensor(image_perm) - sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32) - sample['meta'] = str(scan) + "_light" + str(light_idx) + "_refview" + str(ref_view) - - # - image to render - sample['query_image'] = sample['images'][0] - sample['query_c2w'] = sample['c2ws'][0] - sample['query_w2c'] = sample['w2cs'][0] - sample['query_intrinsic'] = sample['intrinsics'][0] - sample['query_depth'] = sample['depths_h'][0] - sample['query_mask'] = sample['masks_h'][0] - sample['query_near_far'] = sample['near_fars'][0] - - sample['images'] = sample['images'][start_idx:] # (V, 3, H, W) - sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W) - sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W) - sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4) - sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4) - sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3) - sample['view_ids'] = sample['view_ids'][start_idx:] - sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space - - sample['scale_mat'] = torch.from_numpy(scale_mat) - sample['trans_mat'] = torch.from_numpy(w2c_ref_inv) - - # - generate rays - if ('val' in self.split) or ('test' in self.split): - sample_rays = gen_rays_from_single_image( - img_wh[1], img_wh[0], - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None) - else: - sample_rays = gen_random_rays_from_single_image( - img_wh[1], img_wh[0], - self.N_rays, - sample['query_image'], - sample['query_intrinsic'], - sample['query_c2w'], - depth=sample['query_depth'], - mask=sample['query_mask'] if self.clean_image else None, - dilated_mask=mask_dilated, - importance_sample=self.importance_sample) - - sample['rays'] = sample_rays - - return sample diff --git a/SparseNeuS_demo_v1/data/scene.py b/SparseNeuS_demo_v1/data/scene.py index 49183c65418338864ecabdd1af914bbb0f055579..5f34f4abf9977fba8a3f8785ef4f0c95dbd9fa1b 100644 --- a/SparseNeuS_demo_v1/data/scene.py +++ b/SparseNeuS_demo_v1/data/scene.py @@ -1,6 +1,5 @@ import numpy as np import torch -import pdb def rigid_transform(xyz, transform): diff --git a/SparseNeuS_demo_v1/evaluation/__init__.py b/SparseNeuS_demo_v1/evaluation/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/SparseNeuS_demo_v1/evaluation/clean_mesh.py b/SparseNeuS_demo_v1/evaluation/clean_mesh.py deleted file mode 100644 index ab65cc72d3be615b71ec852a7adea933355aa250..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/evaluation/clean_mesh.py +++ /dev/null @@ -1,283 +0,0 @@ -import numpy as np -import cv2 as cv -import os -from glob import glob -from scipy.io import loadmat -import trimesh -import open3d as o3d -import torch -from tqdm import tqdm - -import sys - -sys.path.append("../") - - -def gen_rays_from_single_image(H, W, image, intrinsic, c2w, depth=None, mask=None): - """ - generate rays in world space, for image image - :param H: - :param W: - :param intrinsics: [3,3] - :param c2ws: [4,4] - :return: - """ - device = image.device - ys, xs = torch.meshgrid(torch.linspace(0, H - 1, H), - torch.linspace(0, W - 1, W)) # pytorch's meshgrid has indexing='ij' - p = torch.stack([xs, ys, torch.ones_like(ys)], dim=-1) # H, W, 3 - - # normalized ndc uv coordinates, (-1, 1) - ndc_u = 2 * xs / (W - 1) - 1 - ndc_v = 2 * ys / (H - 1) - 1 - rays_ndc_uv = torch.stack([ndc_u, ndc_v], dim=-1).view(-1, 2).float().to(device) - - intrinsic_inv = torch.inverse(intrinsic) - - p = p.view(-1, 3).float().to(device) # N_rays, 3 - p = torch.matmul(intrinsic_inv[None, :3, :3], p[:, :, None]).squeeze() # N_rays, 3 - rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # N_rays, 3 - rays_v = torch.matmul(c2w[None, :3, :3], rays_v[:, :, None]).squeeze() # N_rays, 3 - rays_o = c2w[None, :3, 3].expand(rays_v.shape) # N_rays, 3 - - image = image.permute(1, 2, 0) - color = image.view(-1, 3) - depth = depth.view(-1, 1) if depth is not None else None - mask = mask.view(-1, 1) if mask is not None else torch.ones([H * W, 1]).to(device) - sample = { - 'rays_o': rays_o, - 'rays_v': rays_v, - 'rays_ndc_uv': rays_ndc_uv, - 'rays_color': color, - # 'rays_depth': depth, - 'rays_mask': mask, - 'rays_norm_XYZ_cam': p # - XYZ_cam, before multiply depth - } - if depth is not None: - sample['rays_depth'] = depth - - return sample - - -def load_K_Rt_from_P(filename, P=None): - if P is None: - lines = open(filename).read().splitlines() - if len(lines) == 4: - lines = lines[1:] - lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] - P = np.asarray(lines).astype(np.float32).squeeze() - - out = cv.decomposeProjectionMatrix(P) - K = out[0] - R = out[1] - t = out[2] - - K = K / K[2, 2] - intrinsics = np.eye(4) - intrinsics[:3, :3] = K - - pose = np.eye(4, dtype=np.float32) - pose[:3, :3] = R.transpose() # ? why need transpose here - pose[:3, 3] = (t[:3] / t[3])[:, 0] - - return intrinsics, pose # ! return cam2world matrix here - - -def clean_points_by_mask(points, scan, imgs_idx=None, minimal_vis=0, mask_dilated_size=11): - cameras = np.load('{}/scan{}/cameras.npz'.format(DTU_DIR, scan)) - mask_lis = sorted(glob('{}/scan{}/mask/*.png'.format(DTU_DIR, scan))) - n_images = 49 if scan < 83 else 64 - inside_mask = np.zeros(len(points)) - - if imgs_idx is None: - imgs_idx = [i for i in range(n_images)] - - # imgs_idx = [i for i in range(n_images)] - for i in imgs_idx: - P = cameras['world_mat_{}'.format(i)] - pts_image = np.matmul(P[None, :3, :3], points[:, :, None]).squeeze() + P[None, :3, 3] - pts_image = pts_image / pts_image[:, 2:] - pts_image = np.round(pts_image).astype(np.int32) + 1 - - mask_image = cv.imread(mask_lis[i]) - kernel_size = mask_dilated_size # default 101 - kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (kernel_size, kernel_size)) - mask_image = cv.dilate(mask_image, kernel, iterations=1) - mask_image = (mask_image[:, :, 0] > 128) - - mask_image = np.concatenate([np.ones([1, 1600]), mask_image, np.ones([1, 1600])], axis=0) - mask_image = np.concatenate([np.ones([1202, 1]), mask_image, np.ones([1202, 1])], axis=1) - - in_mask = (pts_image[:, 0] >= 0) * (pts_image[:, 0] <= 1600) * (pts_image[:, 1] >= 0) * ( - pts_image[:, 1] <= 1200) > 0 - curr_mask = mask_image[(pts_image[:, 1].clip(0, 1201), pts_image[:, 0].clip(0, 1601))] - - curr_mask = curr_mask.astype(np.float32) * in_mask - - inside_mask += curr_mask - - return inside_mask > minimal_vis - - -def clean_mesh_faces_by_mask(mesh_file, new_mesh_file, scan, imgs_idx, minimal_vis=0, mask_dilated_size=11): - old_mesh = trimesh.load(mesh_file) - old_vertices = old_mesh.vertices[:] - old_faces = old_mesh.faces[:] - mask = clean_points_by_mask(old_vertices, scan, imgs_idx, minimal_vis, mask_dilated_size) - indexes = np.ones(len(old_vertices)) * -1 - indexes = indexes.astype(np.long) - indexes[np.where(mask)] = np.arange(len(np.where(mask)[0])) - - faces_mask = mask[old_faces[:, 0]] & mask[old_faces[:, 1]] & mask[old_faces[:, 2]] - new_faces = old_faces[np.where(faces_mask)] - new_faces[:, 0] = indexes[new_faces[:, 0]] - new_faces[:, 1] = indexes[new_faces[:, 1]] - new_faces[:, 2] = indexes[new_faces[:, 2]] - new_vertices = old_vertices[np.where(mask)] - - new_mesh = trimesh.Trimesh(new_vertices, new_faces) - - new_mesh.export(new_mesh_file) - - -def clean_mesh_by_faces_num(mesh, faces_num=500): - old_vertices = mesh.vertices[:] - old_faces = mesh.faces[:] - - cc = trimesh.graph.connected_components(mesh.face_adjacency, min_len=faces_num) - mask = np.zeros(len(mesh.faces), dtype=np.bool) - mask[np.concatenate(cc)] = True - - indexes = np.ones(len(old_vertices)) * -1 - indexes = indexes.astype(np.long) - indexes[np.where(mask)] = np.arange(len(np.where(mask)[0])) - - faces_mask = mask[old_faces[:, 0]] & mask[old_faces[:, 1]] & mask[old_faces[:, 2]] - new_faces = old_faces[np.where(faces_mask)] - new_faces[:, 0] = indexes[new_faces[:, 0]] - new_faces[:, 1] = indexes[new_faces[:, 1]] - new_faces[:, 2] = indexes[new_faces[:, 2]] - new_vertices = old_vertices[np.where(mask)] - - new_mesh = trimesh.Trimesh(new_vertices, new_faces) - - return new_mesh - - -def clean_mesh_faces_outside_frustum(old_mesh_file, new_mesh_file, imgs_idx, H=1200, W=1600, mask_dilated_size=11, - isolated_face_num=500, keep_largest=True): - '''Remove faces of mesh which cannot be orserved by all cameras - ''' - # if path_mask_npz: - # path_save_clean = IOUtils.add_file_name_suffix(path_save_clean, '_mask') - - cameras = np.load('{}/scan{}/cameras.npz'.format(DTU_DIR, scan)) - mask_lis = sorted(glob('{}/scan{}/mask/*.png'.format(DTU_DIR, scan))) - - mesh = trimesh.load(old_mesh_file) - intersector = trimesh.ray.ray_pyembree.RayMeshIntersector(mesh) - - all_indices = [] - chunk_size = 5120 - for i in imgs_idx: - mask_image = cv.imread(mask_lis[i]) - kernel_size = mask_dilated_size # default 101 - kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (kernel_size, kernel_size)) - mask_image = cv.dilate(mask_image, kernel, iterations=1) - - P = cameras['world_mat_{}'.format(i)] - - intrinsic, pose = load_K_Rt_from_P(None, P[:3, :]) - - rays = gen_rays_from_single_image(H, W, torch.from_numpy(mask_image).permute(2, 0, 1).float(), - torch.from_numpy(intrinsic)[:3, :3].float(), - torch.from_numpy(pose).float()) - rays_o = rays['rays_o'] - rays_d = rays['rays_v'] - rays_mask = rays['rays_color'] - - rays_o = rays_o.split(chunk_size) - rays_d = rays_d.split(chunk_size) - rays_mask = rays_mask.split(chunk_size) - - for rays_o_batch, rays_d_batch, rays_mask_batch in tqdm(zip(rays_o, rays_d, rays_mask)): - rays_mask_batch = rays_mask_batch[:, 0] > 128 - rays_o_batch = rays_o_batch[rays_mask_batch] - rays_d_batch = rays_d_batch[rays_mask_batch] - - idx_faces_hits = intersector.intersects_first(rays_o_batch.cpu().numpy(), rays_d_batch.cpu().numpy()) - all_indices.append(idx_faces_hits) - - values = np.unique(np.concatenate(all_indices, axis=0)) - mask_faces = np.ones(len(mesh.faces)) - mask_faces[values[1:]] = 0 - print(f'Surfaces/Kept: {len(mesh.faces)}/{len(values)}') - - mesh_o3d = o3d.io.read_triangle_mesh(old_mesh_file) - print("removing triangles by mask") - mesh_o3d.remove_triangles_by_mask(mask_faces) - - o3d.io.write_triangle_mesh(new_mesh_file, mesh_o3d) - - # # clean meshes - new_mesh = trimesh.load(new_mesh_file) - cc = trimesh.graph.connected_components(new_mesh.face_adjacency, min_len=500) - mask = np.zeros(len(new_mesh.faces), dtype=np.bool) - mask[np.concatenate(cc)] = True - new_mesh.update_faces(mask) - new_mesh.remove_unreferenced_vertices() - new_mesh.export(new_mesh_file) - - # meshes = new_mesh.split(only_watertight=False) - # - # if not keep_largest: - # meshes = [mesh for mesh in meshes if len(mesh.faces) > isolated_face_num] - # # new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])] - # merged_mesh = trimesh.util.concatenate(meshes) - # merged_mesh.export(new_mesh_file) - # else: - # new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])] - # new_mesh.export(new_mesh_file) - - o3d.io.write_triangle_mesh(new_mesh_file.replace(".ply", "_raw.ply"), mesh_o3d) - print("finishing removing triangles") - - -def clean_outliers(old_mesh_file, new_mesh_file): - new_mesh = trimesh.load(old_mesh_file) - - meshes = new_mesh.split(only_watertight=False) - new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])] - - new_mesh.export(new_mesh_file) - - -if __name__ == "__main__": - - scans = [24, 37, 40, 55, 63, 65, 69, 83, 97, 105, 106, 110, 114, 118, 122] - - mask_kernel_size = 11 - - imgs_idx = [0, 1, 2] - # imgs_idx = [42, 43, 44] - # imgs_idx = [1, 8, 9] - - DTU_DIR = "/home/xiaoxiao/dataset/DTU_IDR/DTU" - # DTU_DIR = "/userhome/cs/xxlong/dataset/DTU_IDR/DTU" - - base_path = "/home/xiaoxiao/Workplace/nerf_reconstruction/Volume_NeuS/neus_camsys/exp/dtu/evaluation_23_24_33_new/volsdf" - - for scan in scans: - print("processing scan%d" % scan) - dir_path = os.path.join(base_path, "scan%d" % scan) - - old_mesh_file = glob(os.path.join(dir_path, "*.ply"))[0] - - clean_mesh_file = os.path.join(dir_path, "clean_%03d.ply" % scan) - final_mesh_file = os.path.join(dir_path, "final_%03d.ply" % scan) - - clean_mesh_faces_by_mask(old_mesh_file, clean_mesh_file, scan, imgs_idx, minimal_vis=1, - mask_dilated_size=mask_kernel_size) - clean_mesh_faces_outside_frustum(clean_mesh_file, final_mesh_file, imgs_idx, mask_dilated_size=mask_kernel_size) - - print("finish processing scan%d" % scan) diff --git a/SparseNeuS_demo_v1/evaluation/eval_dtu_python.py b/SparseNeuS_demo_v1/evaluation/eval_dtu_python.py deleted file mode 100644 index a60230705ab3f8c7c2a0ed64a20634c7ab4d2eea..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/evaluation/eval_dtu_python.py +++ /dev/null @@ -1,369 +0,0 @@ -import numpy as np -import open3d as o3d -import sklearn.neighbors as skln -from tqdm import tqdm -from scipy.io import loadmat -import multiprocessing as mp -import argparse, os, sys -import cv2 as cv - -from pathlib import Path - - -def get_path_components(path): - path = Path(path) - ppath = str(path.parent) - stem = str(path.stem) - ext = str(path.suffix) - return ppath, stem, ext - - -def sample_single_tri(input_): - n1, n2, v1, v2, tri_vert = input_ - c = np.mgrid[:n1 + 1, :n2 + 1] - c += 0.5 - c[0] /= max(n1, 1e-7) - c[1] /= max(n2, 1e-7) - c = np.transpose(c, (1, 2, 0)) - k = c[c.sum(axis=-1) < 1] # m2 - q = v1 * k[:, :1] + v2 * k[:, 1:] + tri_vert - return q - - -def write_vis_pcd(file, points, colors): - pcd = o3d.geometry.PointCloud() - pcd.points = o3d.utility.Vector3dVector(points) - pcd.colors = o3d.utility.Vector3dVector(colors) - o3d.io.write_point_cloud(file, pcd) - - -def eval_cloud(args, num_cpu_cores=-1): - mp.freeze_support() - os.makedirs(args.vis_out_dir, exist_ok=True) - - thresh = args.downsample_density - if args.mode == 'mesh': - pbar = tqdm(total=9) - pbar.set_description('read data mesh') - data_mesh = o3d.io.read_triangle_mesh(args.data) - - vertices = np.asarray(data_mesh.vertices) - triangles = np.asarray(data_mesh.triangles) - tri_vert = vertices[triangles] - - pbar.update(1) - pbar.set_description('sample pcd from mesh') - v1 = tri_vert[:, 1] - tri_vert[:, 0] - v2 = tri_vert[:, 2] - tri_vert[:, 0] - l1 = np.linalg.norm(v1, axis=-1, keepdims=True) - l2 = np.linalg.norm(v2, axis=-1, keepdims=True) - area2 = np.linalg.norm(np.cross(v1, v2), axis=-1, keepdims=True) - non_zero_area = (area2 > 0)[:, 0] - l1, l2, area2, v1, v2, tri_vert = [ - arr[non_zero_area] for arr in [l1, l2, area2, v1, v2, tri_vert] - ] - thr = thresh * np.sqrt(l1 * l2 / area2) - n1 = np.floor(l1 / thr) - n2 = np.floor(l2 / thr) - - with mp.Pool() as mp_pool: - new_pts = mp_pool.map(sample_single_tri, - ((n1[i, 0], n2[i, 0], v1[i:i + 1], v2[i:i + 1], tri_vert[i:i + 1, 0]) for i in - range(len(n1))), chunksize=1024) - - new_pts = np.concatenate(new_pts, axis=0) - data_pcd = np.concatenate([vertices, new_pts], axis=0) - - elif args.mode == 'pcd': - pbar = tqdm(total=8) - pbar.set_description('read data pcd') - data_pcd_o3d = o3d.io.read_point_cloud(args.data) - data_pcd = np.asarray(data_pcd_o3d.points) - - pbar.update(1) - pbar.set_description('random shuffle pcd index') - shuffle_rng = np.random.default_rng() - shuffle_rng.shuffle(data_pcd, axis=0) - - pbar.update(1) - pbar.set_description('downsample pcd') - nn_engine = skln.NearestNeighbors(n_neighbors=1, radius=thresh, algorithm='kd_tree', n_jobs=num_cpu_cores) - nn_engine.fit(data_pcd) - rnn_idxs = nn_engine.radius_neighbors(data_pcd, radius=thresh, return_distance=False) - mask = np.ones(data_pcd.shape[0], dtype=np.bool_) - for curr, idxs in enumerate(rnn_idxs): - if mask[curr]: - mask[idxs] = 0 - mask[curr] = 1 - data_down = data_pcd[mask] - - pbar.update(1) - pbar.set_description('masking data pcd') - obs_mask_file = loadmat(f'{args.dataset_dir}/ObsMask/ObsMask{args.scan}_10.mat') - ObsMask, BB, Res = [obs_mask_file[attr] for attr in ['ObsMask', 'BB', 'Res']] - BB = BB.astype(np.float32) - - patch = args.patch_size - inbound = ((data_down >= BB[:1] - patch) & (data_down < BB[1:] + patch * 2)).sum(axis=-1) == 3 - data_in = data_down[inbound] - - data_grid = np.around((data_in - BB[:1]) / Res).astype(np.int32) - grid_inbound = ((data_grid >= 0) & (data_grid < np.expand_dims(ObsMask.shape, 0))).sum(axis=-1) == 3 - data_grid_in = data_grid[grid_inbound] - in_obs = ObsMask[data_grid_in[:, 0], data_grid_in[:, 1], data_grid_in[:, 2]].astype(np.bool_) - data_in_obs = data_in[grid_inbound][in_obs] - - pbar.update(1) - pbar.set_description('read STL pcd') - stl_pcd = o3d.io.read_point_cloud(args.gt) - stl = np.asarray(stl_pcd.points) - - pbar.update(1) - pbar.set_description('compute data2stl') - nn_engine.fit(stl) - dist_d2s, idx_d2s = nn_engine.kneighbors(data_in_obs, n_neighbors=1, return_distance=True) - max_dist = args.max_dist - mean_d2s = dist_d2s[dist_d2s < max_dist].mean() - - pbar.update(1) - pbar.set_description('compute stl2data') - ground_plane = loadmat(f'{args.dataset_dir}/ObsMask/Plane{args.scan}.mat')['P'] - - stl_hom = np.concatenate([stl, np.ones_like(stl[:, :1])], -1) - above = (ground_plane.reshape((1, 4)) * stl_hom).sum(-1) > 0 - stl_above = stl[above] - - nn_engine.fit(data_in) - dist_s2d, idx_s2d = nn_engine.kneighbors(stl_above, n_neighbors=1, return_distance=True) - mean_s2d = dist_s2d[dist_s2d < max_dist].mean() - - pbar.update(1) - pbar.set_description('visualize error') - vis_dist = args.visualize_threshold - R = np.array([[1, 0, 0]], dtype=np.float64) - G = np.array([[0, 1, 0]], dtype=np.float64) - B = np.array([[0, 0, 1]], dtype=np.float64) - W = np.array([[1, 1, 1]], dtype=np.float64) - data_color = np.tile(B, (data_down.shape[0], 1)) - data_alpha = dist_d2s.clip(max=vis_dist) / vis_dist - data_color[np.where(inbound)[0][grid_inbound][in_obs]] = R * data_alpha + W * (1 - data_alpha) - data_color[np.where(inbound)[0][grid_inbound][in_obs][dist_d2s[:, 0] >= max_dist]] = G - write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_d2gt.ply', data_down, data_color) - stl_color = np.tile(B, (stl.shape[0], 1)) - stl_alpha = dist_s2d.clip(max=vis_dist) / vis_dist - stl_color[np.where(above)[0]] = R * stl_alpha + W * (1 - stl_alpha) - stl_color[np.where(above)[0][dist_s2d[:, 0] >= max_dist]] = G - write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_gt2d.ply', stl, stl_color) - - pbar.update(1) - pbar.set_description('done') - pbar.close() - over_all = (mean_d2s + mean_s2d) / 2 - print(f'ean_d2gt: {mean_d2s}; mean_gt2d: {mean_s2d} over_all: {over_all}; .') - - pparent, stem, ext = get_path_components(args.data) - if args.log is None: - path_log = os.path.join(pparent, 'eval_result.txt') - else: - path_log = args.log - with open(path_log, 'a+') as fLog: - fLog.write(f'mean_d2gt {np.round(mean_d2s, 3)} ' - f'mean_gt2d {np.round(mean_s2d, 3)} ' - f'Over_all {np.round(over_all, 3)} ' - f'[{stem}] \n') - - return over_all, mean_d2s, mean_s2d - - -if __name__ == '__main__': - from glob import glob - - mp.freeze_support() - - parser = argparse.ArgumentParser() - parser.add_argument('--data', type=str, default='data_in.ply') - parser.add_argument('--gt', type=str, help='ground truth') - parser.add_argument('--scan', type=int, default=1) - parser.add_argument('--mode', type=str, default='mesh', choices=['mesh', 'pcd']) - parser.add_argument('--dataset_dir', type=str, default='/dataset/dtu_official/SampleSet/MVS_Data') - parser.add_argument('--vis_out_dir', type=str, default='.') - parser.add_argument('--downsample_density', type=float, default=0.2) - parser.add_argument('--patch_size', type=float, default=60) - parser.add_argument('--max_dist', type=float, default=20) - parser.add_argument('--visualize_threshold', type=float, default=10) - parser.add_argument('--log', type=str, default=None) - args = parser.parse_args() - - base_dir = "./exp" - - GT_DIR = "./gt_pcd" - - scans = [24, 37, 40, 55, 63, 65, 69, 83, 97, 105, 106, 110, 114, 118, 122] - - for scan in scans: - - print("processing scan%d" % scan) - - args.data = os.path.join(base_dir, "scan{}".format(scan), "final_%03d.ply" % scan) - - if not os.path.exists(args.data): - continue - - args.gt = os.path.join(GT_DIR, "stl%03d_total.ply" % scan) - args.vis_out_dir = os.path.join(base_dir, "scan{}".format(scan)) - args.scan = scan - os.makedirs(args.vis_out_dir, exist_ok=True) - - dist_thred1 = 1 - dist_thred2 = 2 - - thresh = args.downsample_density - - if args.mode == 'mesh': - pbar = tqdm(total=9) - pbar.set_description('read data mesh') - data_mesh = o3d.io.read_triangle_mesh(args.data) - - vertices = np.asarray(data_mesh.vertices) - triangles = np.asarray(data_mesh.triangles) - tri_vert = vertices[triangles] - - pbar.update(1) - pbar.set_description('sample pcd from mesh') - v1 = tri_vert[:, 1] - tri_vert[:, 0] - v2 = tri_vert[:, 2] - tri_vert[:, 0] - l1 = np.linalg.norm(v1, axis=-1, keepdims=True) - l2 = np.linalg.norm(v2, axis=-1, keepdims=True) - area2 = np.linalg.norm(np.cross(v1, v2), axis=-1, keepdims=True) - non_zero_area = (area2 > 0)[:, 0] - l1, l2, area2, v1, v2, tri_vert = [ - arr[non_zero_area] for arr in [l1, l2, area2, v1, v2, tri_vert] - ] - thr = thresh * np.sqrt(l1 * l2 / area2) - n1 = np.floor(l1 / thr) - n2 = np.floor(l2 / thr) - - with mp.Pool() as mp_pool: - new_pts = mp_pool.map(sample_single_tri, - ((n1[i, 0], n2[i, 0], v1[i:i + 1], v2[i:i + 1], tri_vert[i:i + 1, 0]) for i in - range(len(n1))), chunksize=1024) - - new_pts = np.concatenate(new_pts, axis=0) - data_pcd = np.concatenate([vertices, new_pts], axis=0) - - elif args.mode == 'pcd': - pbar = tqdm(total=8) - pbar.set_description('read data pcd') - data_pcd_o3d = o3d.io.read_point_cloud(args.data) - data_pcd = np.asarray(data_pcd_o3d.points) - - pbar.update(1) - pbar.set_description('random shuffle pcd index') - shuffle_rng = np.random.default_rng() - shuffle_rng.shuffle(data_pcd, axis=0) - - pbar.update(1) - pbar.set_description('downsample pcd') - nn_engine = skln.NearestNeighbors(n_neighbors=1, radius=thresh, algorithm='kd_tree', n_jobs=-1) - nn_engine.fit(data_pcd) - rnn_idxs = nn_engine.radius_neighbors(data_pcd, radius=thresh, return_distance=False) - mask = np.ones(data_pcd.shape[0], dtype=np.bool_) - for curr, idxs in enumerate(rnn_idxs): - if mask[curr]: - mask[idxs] = 0 - mask[curr] = 1 - data_down = data_pcd[mask] - - pbar.update(1) - pbar.set_description('masking data pcd') - obs_mask_file = loadmat(f'{args.dataset_dir}/ObsMask/ObsMask{args.scan}_10.mat') - ObsMask, BB, Res = [obs_mask_file[attr] for attr in ['ObsMask', 'BB', 'Res']] - BB = BB.astype(np.float32) - - patch = args.patch_size - inbound = ((data_down >= BB[:1] - patch) & (data_down < BB[1:] + patch * 2)).sum(axis=-1) == 3 - data_in = data_down[inbound] - - data_grid = np.around((data_in - BB[:1]) / Res).astype(np.int32) - grid_inbound = ((data_grid >= 0) & (data_grid < np.expand_dims(ObsMask.shape, 0))).sum(axis=-1) == 3 - data_grid_in = data_grid[grid_inbound] - in_obs = ObsMask[data_grid_in[:, 0], data_grid_in[:, 1], data_grid_in[:, 2]].astype(np.bool_) - data_in_obs = data_in[grid_inbound][in_obs] - - pbar.update(1) - pbar.set_description('read STL pcd') - stl_pcd = o3d.io.read_point_cloud(args.gt) - stl = np.asarray(stl_pcd.points) - - pbar.update(1) - pbar.set_description('compute data2stl') - nn_engine.fit(stl) - dist_d2s, idx_d2s = nn_engine.kneighbors(data_in_obs, n_neighbors=1, return_distance=True) - max_dist = args.max_dist - mean_d2s = dist_d2s[dist_d2s < max_dist].mean() - - precision_1 = len(dist_d2s[dist_d2s < dist_thred1]) / len(dist_d2s) - precision_2 = len(dist_d2s[dist_d2s < dist_thred2]) / len(dist_d2s) - - pbar.update(1) - pbar.set_description('compute stl2data') - ground_plane = loadmat(f'{args.dataset_dir}/ObsMask/Plane{args.scan}.mat')['P'] - - stl_hom = np.concatenate([stl, np.ones_like(stl[:, :1])], -1) - above = (ground_plane.reshape((1, 4)) * stl_hom).sum(-1) > 0 - - stl_above = stl[above] - - nn_engine.fit(data_in) - dist_s2d, idx_s2d = nn_engine.kneighbors(stl_above, n_neighbors=1, return_distance=True) - mean_s2d = dist_s2d[dist_s2d < max_dist].mean() - - recall_1 = len(dist_s2d[dist_s2d < dist_thred1]) / len(dist_s2d) - recall_2 = len(dist_s2d[dist_s2d < dist_thred2]) / len(dist_s2d) - - pbar.update(1) - pbar.set_description('visualize error') - vis_dist = args.visualize_threshold - R = np.array([[1, 0, 0]], dtype=np.float64) - G = np.array([[0, 1, 0]], dtype=np.float64) - B = np.array([[0, 0, 1]], dtype=np.float64) - W = np.array([[1, 1, 1]], dtype=np.float64) - data_color = np.tile(B, (data_down.shape[0], 1)) - data_alpha = dist_d2s.clip(max=vis_dist) / vis_dist - data_color[np.where(inbound)[0][grid_inbound][in_obs]] = R * data_alpha + W * (1 - data_alpha) - data_color[np.where(inbound)[0][grid_inbound][in_obs][dist_d2s[:, 0] >= max_dist]] = G - write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_d2gt.ply', data_down, data_color) - stl_color = np.tile(B, (stl.shape[0], 1)) - stl_alpha = dist_s2d.clip(max=vis_dist) / vis_dist - stl_color[np.where(above)[0]] = R * stl_alpha + W * (1 - stl_alpha) - stl_color[np.where(above)[0][dist_s2d[:, 0] >= max_dist]] = G - write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_gt2d.ply', stl, stl_color) - - pbar.update(1) - pbar.set_description('done') - pbar.close() - over_all = (mean_d2s + mean_s2d) / 2 - - fscore_1 = 2 * precision_1 * recall_1 / (precision_1 + recall_1 + 1e-6) - fscore_2 = 2 * precision_2 * recall_2 / (precision_2 + recall_2 + 1e-6) - - print(f'over_all: {over_all}; mean_d2gt: {mean_d2s}; mean_gt2d: {mean_s2d}.') - print(f'precision_1mm: {precision_1}; recall_1mm: {recall_1}; fscore_1mm: {fscore_1}') - print(f'precision_2mm: {precision_2}; recall_2mm: {recall_2}; fscore_2mm: {fscore_2}') - - pparent, stem, ext = get_path_components(args.data) - if args.log is None: - path_log = os.path.join(pparent, 'eval_result.txt') - else: - path_log = args.log - with open(path_log, 'w+') as fLog: - fLog.write(f'over_all {np.round(over_all, 3)} ' - f'mean_d2gt {np.round(mean_d2s, 3)} ' - f'mean_gt2d {np.round(mean_s2d, 3)} \n' - f'precision_1mm {np.round(precision_1, 3)} ' - f'recall_1mm {np.round(recall_1, 3)} ' - f'fscore_1mm {np.round(fscore_1, 3)} \n' - f'precision_2mm {np.round(precision_2, 3)} ' - f'recall_2mm {np.round(recall_2, 3)} ' - f'fscore_2mm {np.round(fscore_2, 3)} \n' - f'[{stem}] \n') diff --git a/SparseNeuS_demo_v1/exp_runner_generic_blender_val.py b/SparseNeuS_demo_v1/exp_runner_generic_blender_val.py index 8d2d26c5339e7fc057160239aeb0822542f910f7..7d09a56f3d66935ca26b2690ed637dfb6f51049c 100644 --- a/SparseNeuS_demo_v1/exp_runner_generic_blender_val.py +++ b/SparseNeuS_demo_v1/exp_runner_generic_blender_val.py @@ -1,15 +1,12 @@ -import torch -import torch.nn.functional as F -from torch.utils.data import DataLoader -import argparse import os import logging +import argparse import numpy as np -import cv2 as cv -import trimesh from shutil import copyfile +import torch +from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter -from icecream import ic +from rich import print from tqdm import tqdm from pyhocon import ConfigFactory @@ -17,23 +14,12 @@ import sys sys.path.append(os.path.dirname(__file__)) from models.fields import SingleVarianceNetwork - from models.featurenet import FeatureNet - from models.trainer_generic import GenericTrainer - from models.sparse_sdf_network import SparseSdfNetwork - from models.rendering_network import GeneralRenderingNetwork - -from datetime import datetime - -from data.dtu_general import MVSDatasetDtuPerView - -from utils.training_utils import tocuda from data.blender_general_narrow_all_eval_new_data import BlenderPerView -from termcolor import colored from datetime import datetime @@ -45,14 +31,14 @@ class Runner: self.device = torch.device('cuda:%d' % local_rank) # self.device = torch.device('cuda') self.num_devices = torch.cuda.device_count() - self.is_continue = is_continue + self.is_continue = is_continue or (mode == "export_mesh") self.is_restore = is_restore self.restore_lod0 = restore_lod0 self.mode = mode self.model_list = [] self.logger = logging.getLogger('exp_logger') - print(colored("detected %d GPUs" % self.num_devices, "red")) + print("detected %d GPUs" % self.num_devices) self.conf_path = conf_path self.conf = ConfigFactory.parse_file(conf_path) @@ -63,7 +49,7 @@ class Runner: else: self.base_exp_dir = self.conf['general.base_exp_dir'] self.conf['general.base_exp_dir'] = self.base_exp_dir # jha use this when testing - print(colored("base_exp_dir: " + self.base_exp_dir, 'yellow')) + print("base_exp_dir: " + self.base_exp_dir) os.makedirs(self.base_exp_dir, exist_ok=True) self.iter_step = 0 self.val_step = 0 @@ -121,12 +107,11 @@ class Runner: **self.conf['model.rendering_network_lod1']).to(self.device) if self.mode == 'export_mesh' or self.mode == 'val': # base_exp_dir_to_store = os.path.join(self.base_exp_dir, '{:%Y_%m_%d_%H_%M_%S}'.format(datetime.now())) - print("save mesh to:", os.path.join("../", args.specific_dataset_name)) base_exp_dir_to_store = os.path.join("../", args.specific_dataset_name) #"../gradio_tmp" # MODIFIED else: base_exp_dir_to_store = self.base_exp_dir - print(colored(f"Store in: {base_exp_dir_to_store}", "blue")) + print(f"Store in: {base_exp_dir_to_store}") # Renderer model self.trainer = GenericTrainer( self.rendering_network_outside, @@ -149,7 +134,7 @@ class Runner: # Load checkpoint latest_model_name = None - if is_continue: + if self.is_continue: model_list_raw = os.listdir(os.path.join(self.base_exp_dir, 'checkpoints')) model_list = [] for model_name in model_list_raw: @@ -238,7 +223,7 @@ class Runner: epochs = int(1 + res_step // len(dataloader)) self.adjust_learning_rate() - print(colored("starting training learning rate: {:.5f}".format(self.optimizer.param_groups[0]['lr']), "yellow")) + print("starting training learning rate: {:.5f}".format(self.optimizer.param_groups[0]['lr'])) background_rgb = None if self.use_white_bkgd: @@ -247,7 +232,7 @@ class Runner: for epoch_i in range(epochs): - print(colored("current epoch %d" % epoch_i, 'red')) + print("current epoch %d" % epoch_i) dataloader = tqdm(dataloader) for batch in dataloader: @@ -365,8 +350,8 @@ class Runner: losses_lod1['color_fine_loss'].mean() if losses_lod1 is not None else 0, self.optimizer.param_groups[0]['lr'])) - print(colored('alpha_inter_ratio_lod0 = {:.4f} alpha_inter_ratio_lod1 = {:.4f}\n'.format( - alpha_inter_ratio_lod0, alpha_inter_ratio_lod1), 'green')) + print('alpha_inter_ratio_lod0 = {:.4f} alpha_inter_ratio_lod1 = {:.4f}\n'.format( + alpha_inter_ratio_lod0, alpha_inter_ratio_lod1)) if losses_lod0 is not None: # print("[TEST]: weights_sum in print", losses_lod0['weights_sum'].mean()) @@ -469,7 +454,7 @@ class Runner: # 3. load the new state dict network.load_state_dict(pretrained_dict) except: - print(colored(comment + " load fails", 'yellow')) + print(comment + " load fails") checkpoint = torch.load(os.path.join(self.base_exp_dir, 'checkpoints', checkpoint_name), map_location=self.device) @@ -497,7 +482,7 @@ class Runner: try: self.optimizer.load_state_dict(checkpoint['optimizer']) except: - print(colored("load optimizer fails", "yellow")) + print("load optimizer fails") self.iter_step = checkpoint['iter_step'] self.val_step = checkpoint['val_step'] if 'val_step' in checkpoint.keys() else 0 @@ -532,15 +517,10 @@ class Runner: torch.save(checkpoint, os.path.join(self.base_exp_dir, 'checkpoints', 'ckpt_{:0>6d}.pth'.format(self.iter_step))) - def validate(self, idx=-1, resolution_level=-1): + def validate(self, resolution_level=-1): # validate image - - ic(self.iter_step, idx) + print("iter_step: ", self.iter_step) self.logger.info('Validate begin') - - if idx < 0: - idx = self.val_step - # idx = np.random.randint(len(self.val_dataset)) self.val_step += 1 try: @@ -576,16 +556,9 @@ class Runner: ) - def export_mesh(self, idx=-1, resolution_level=-1): - # validate image - - ic(self.iter_step, idx) + def export_mesh(self, resolution_level=-1): + print("iter_step: ", self.iter_step) self.logger.info('Validate begin') - import time - start1 = time.time() - if idx < 0: - idx = self.val_step - # idx = np.random.randint(len(self.val_dataset)) self.val_step += 1 try: @@ -598,7 +571,6 @@ class Runner: background_rgb = None if self.use_white_bkgd: - # background_rgb = torch.ones([1, 3]).to(self.device) background_rgb = 1.0 batch['batch_idx'] = torch.tensor([x for x in range(self.batch_size)]) @@ -609,8 +581,6 @@ class Runner: else: alpha_inter_ratio_lod0 = 1. alpha_inter_ratio_lod1 = self.get_alpha_inter_ratio(self.anneal_start_lod1, self.anneal_end_lod1) - end1 = time.time() - print("time for getting data", end1 - start1) self.trainer( batch, background_rgb=background_rgb, diff --git a/SparseNeuS_demo_v1/models/patch_projector.py b/SparseNeuS_demo_v1/models/patch_projector.py index cf9ca424c588e49d754988814233069b2cf127fa..24bb64527a1f9a9a1c6db8cd290d38f65b63b6d4 100644 --- a/SparseNeuS_demo_v1/models/patch_projector.py +++ b/SparseNeuS_demo_v1/models/patch_projector.py @@ -208,4 +208,4 @@ def normalize(flow, h, w, clamp=None): def build_patch_offset(h_patch_size): offsets = torch.arange(-h_patch_size, h_patch_size + 1) - return torch.stack(torch.meshgrid(offsets, offsets)[::-1], dim=-1).view(1, -1, 2) # nb_pixels_patch * 2 + return torch.stack(torch.meshgrid(offsets, offsets, indexing="ij")[::-1], dim=-1).view(1, -1, 2) # nb_pixels_patch * 2 diff --git a/SparseNeuS_demo_v1/models/rays.py b/SparseNeuS_demo_v1/models/rays.py index a31df93e727fd79adaaa3e934c67378b611d4ee0..aa45b18df32adc34124687fb06495c1652cb1678 100644 --- a/SparseNeuS_demo_v1/models/rays.py +++ b/SparseNeuS_demo_v1/models/rays.py @@ -1,12 +1,7 @@ -import os, torch, cv2, re +import os, torch import numpy as np -from PIL import Image import torch.nn.functional as F -import torchvision.transforms as T - -from random import random - def build_patch_offset(h_patch_size): offsets = torch.arange(-h_patch_size, h_patch_size + 1) @@ -24,7 +19,7 @@ def gen_rays_from_single_image(H, W, image, intrinsic, c2w, depth=None, mask=Non """ device = image.device ys, xs = torch.meshgrid(torch.linspace(0, H - 1, H), - torch.linspace(0, W - 1, W)) # pytorch's meshgrid has indexing='ij' + torch.linspace(0, W - 1, W), indexing="ij") # pytorch's meshgrid has indexing='ij' p = torch.stack([xs, ys, torch.ones_like(ys)], dim=-1) # H, W, 3 # normalized ndc uv coordinates, (-1, 1) @@ -86,7 +81,7 @@ def gen_random_rays_from_single_image(H, W, N_rays, image, intrinsic, c2w, depth pixels_y_1 = torch.randint(low=0, high=H, size=[N_rays // 4]) ys, xs = torch.meshgrid(torch.linspace(0, H - 1, H), - torch.linspace(0, W - 1, W)) # pytorch's meshgrid has indexing='ij' + torch.linspace(0, W - 1, W), indexing="ij") # pytorch's meshgrid has indexing='ij' p = torch.stack([xs, ys], dim=-1) # H, W, 2 try: @@ -292,7 +287,7 @@ def gen_rays_between(c2w_0, c2w_1, intrinsic, ratio, H, W, resolution_level=1): l = resolution_level tx = torch.linspace(0, W - 1, W // l) ty = torch.linspace(0, H - 1, H // l) - pixels_x, pixels_y = torch.meshgrid(tx, ty) + pixels_x, pixels_y = torch.meshgrid(tx, ty, indexing="ij") p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1).to(device) # W, H, 3 intrinsic_inv = torch.inverse(intrinsic[:3, :3]) diff --git a/SparseNeuS_demo_v1/models/render_utils.py b/SparseNeuS_demo_v1/models/render_utils.py index e1d3d8fc4ca7bf5e306733a213dec96a517a71c7..c14d5761234a16a19ed10509f9f0972adaf04c9a 100644 --- a/SparseNeuS_demo_v1/models/render_utils.py +++ b/SparseNeuS_demo_v1/models/render_utils.py @@ -1,14 +1,8 @@ import torch import torch.nn as nn import torch.nn.functional as F -import numpy as np -import logging -import mcubes -import trimesh -from icecream import ic from ops.back_project import cam2pixel -import pdb def sample_pdf(bins, weights, n_samples, det=False): diff --git a/SparseNeuS_demo_v1/models/sparse_neus_renderer.py b/SparseNeuS_demo_v1/models/sparse_neus_renderer.py index 8015669f349f5b61ca1cb234ec2fcdf71cd10407..96ffc7b547e0f83a177a81f36be38375d9cd26fb 100644 --- a/SparseNeuS_demo_v1/models/sparse_neus_renderer.py +++ b/SparseNeuS_demo_v1/models/sparse_neus_renderer.py @@ -10,7 +10,6 @@ import torch.nn.functional as F import numpy as np import logging import mcubes -import trimesh from icecream import ic from models.render_utils import sample_pdf @@ -21,10 +20,6 @@ from models.fast_renderer import FastRenderer from models.patch_projector import PatchProjector -from models.rays import gen_rays_between - -import pdb - class SparseNeuSRenderer(nn.Module): """ @@ -898,7 +893,7 @@ class SparseNeuSRenderer(nn.Module): for xi, xs in enumerate(X): for yi, ys in enumerate(Y): for zi, zs in enumerate(Z): - xx, yy, zz = torch.meshgrid(xs, ys, zs) + xx, yy, zz = torch.meshgrid(xs, ys, zs, indexing="ij") pts = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1) # ! attention, the query function is different for extract geometry and fields @@ -958,7 +953,7 @@ class SparseNeuSRenderer(nn.Module): with torch.no_grad(): ys, xs = torch.meshgrid(torch.linspace(0, H - 1, H), - torch.linspace(0, W - 1, W)) # pytorch's meshgrid has indexing='ij' + torch.linspace(0, W - 1, W), indexing="ij") # pytorch's meshgrid has indexing='ij' p = torch.stack([xs, ys, torch.ones_like(ys)], dim=-1) # H, W, 3 intrinsics_inv = torch.inverse(intrinsics) diff --git a/SparseNeuS_demo_v1/models/sparse_neus_renderer_normals_new.py b/SparseNeuS_demo_v1/models/sparse_neus_renderer_normals_new.py deleted file mode 100644 index 34e22aa312312b4fc7e8225e15f1eea5a2de71d1..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/models/sparse_neus_renderer_normals_new.py +++ /dev/null @@ -1,992 +0,0 @@ -""" -The codes are heavily borrowed from NeuS -""" - -import os -import cv2 as cv -import torch -import torch.nn as nn -import torch.nn.functional as F -import numpy as np -import logging -import mcubes -import trimesh -from icecream import ic -from models.render_utils import sample_pdf - -from models.projector import Projector -from tsparse.torchsparse_utils import sparse_to_dense_channel - -from models.fast_renderer import FastRenderer - -from models.patch_projector import PatchProjector - -from models.rays import gen_rays_between - -import pdb - - -class SparseNeuSRenderer(nn.Module): - """ - conditional neus render; - optimize on normalized world space; - warped by nn.Module to support DataParallel traning - """ - - def __init__(self, - rendering_network_outside, - sdf_network, - variance_network, - rendering_network, - n_samples, - n_importance, - n_outside, - perturb, - alpha_type='div', - conf=None - ): - super(SparseNeuSRenderer, self).__init__() - - self.conf = conf - self.base_exp_dir = conf['general.base_exp_dir'] - - # network setups - self.rendering_network_outside = rendering_network_outside - self.sdf_network = sdf_network - self.variance_network = variance_network - self.rendering_network = rendering_network - - self.n_samples = n_samples - self.n_importance = n_importance - self.n_outside = n_outside - self.perturb = perturb - self.alpha_type = alpha_type - - self.rendering_projector = Projector() # used to obtain features for generalized rendering - - self.h_patch_size = self.conf.get_int('model.h_patch_size', default=3) - self.patch_projector = PatchProjector(self.h_patch_size) - - self.ray_tracer = FastRenderer() # ray_tracer to extract depth maps from sdf_volume - - # - fitted rendering or general rendering - try: - self.if_fitted_rendering = self.sdf_network.if_fitted_rendering - except: - self.if_fitted_rendering = False - - def up_sample(self, rays_o, rays_d, z_vals, sdf, n_importance, inv_variance, - conditional_valid_mask_volume=None): - device = rays_o.device - batch_size, n_samples = z_vals.shape - pts = rays_o[:, None, :] + rays_d[:, None, :] * z_vals[..., :, None] # n_rays, n_samples, 3 - - if conditional_valid_mask_volume is not None: - pts_mask = self.get_pts_mask_for_conditional_volume(pts.view(-1, 3), conditional_valid_mask_volume) - pts_mask = pts_mask.reshape(batch_size, n_samples) - pts_mask = pts_mask[:, :-1] * pts_mask[:, 1:] # [batch_size, n_samples-1] - else: - pts_mask = torch.ones([batch_size, n_samples]).to(pts.device) - - sdf = sdf.reshape(batch_size, n_samples) - prev_sdf, next_sdf = sdf[:, :-1], sdf[:, 1:] - prev_z_vals, next_z_vals = z_vals[:, :-1], z_vals[:, 1:] - mid_sdf = (prev_sdf + next_sdf) * 0.5 - dot_val = None - if self.alpha_type == 'uniform': - dot_val = torch.ones([batch_size, n_samples - 1]) * -1.0 - else: - dot_val = (next_sdf - prev_sdf) / (next_z_vals - prev_z_vals + 1e-5) - prev_dot_val = torch.cat([torch.zeros([batch_size, 1]).to(device), dot_val[:, :-1]], dim=-1) - dot_val = torch.stack([prev_dot_val, dot_val], dim=-1) - dot_val, _ = torch.min(dot_val, dim=-1, keepdim=False) - dot_val = dot_val.clip(-10.0, 0.0) * pts_mask - dist = (next_z_vals - prev_z_vals) - prev_esti_sdf = mid_sdf - dot_val * dist * 0.5 - next_esti_sdf = mid_sdf + dot_val * dist * 0.5 - prev_cdf = torch.sigmoid(prev_esti_sdf * inv_variance) - next_cdf = torch.sigmoid(next_esti_sdf * inv_variance) - alpha_sdf = (prev_cdf - next_cdf + 1e-5) / (prev_cdf + 1e-5) - - alpha = alpha_sdf - - # - apply pts_mask - alpha = pts_mask * alpha - - weights = alpha * torch.cumprod( - torch.cat([torch.ones([batch_size, 1]).to(device), 1. - alpha + 1e-7], -1), -1)[:, :-1] - - z_samples = sample_pdf(z_vals, weights, n_importance, det=True).detach() - return z_samples - - def cat_z_vals(self, rays_o, rays_d, z_vals, new_z_vals, sdf, lod, - sdf_network, gru_fusion, - # * related to conditional feature - conditional_volume=None, - conditional_valid_mask_volume=None - ): - device = rays_o.device - batch_size, n_samples = z_vals.shape - _, n_importance = new_z_vals.shape - pts = rays_o[:, None, :] + rays_d[:, None, :] * new_z_vals[..., :, None] - - if conditional_valid_mask_volume is not None: - pts_mask = self.get_pts_mask_for_conditional_volume(pts.view(-1, 3), conditional_valid_mask_volume) - pts_mask = pts_mask.reshape(batch_size, n_importance) - pts_mask_bool = (pts_mask > 0).view(-1) - else: - pts_mask = torch.ones([batch_size, n_importance]).to(pts.device) - - new_sdf = torch.ones([batch_size * n_importance, 1]).to(pts.dtype).to(device) * 100 - - if torch.sum(pts_mask) > 1: - new_outputs = sdf_network.sdf(pts.reshape(-1, 3)[pts_mask_bool], conditional_volume, lod=lod) - new_sdf[pts_mask_bool] = new_outputs['sdf_pts_scale%d' % lod] # .reshape(batch_size, n_importance) - - new_sdf = new_sdf.view(batch_size, n_importance) - - z_vals = torch.cat([z_vals, new_z_vals], dim=-1) - sdf = torch.cat([sdf, new_sdf], dim=-1) - - z_vals, index = torch.sort(z_vals, dim=-1) - xx = torch.arange(batch_size)[:, None].expand(batch_size, n_samples + n_importance).reshape(-1) - index = index.reshape(-1) - sdf = sdf[(xx, index)].reshape(batch_size, n_samples + n_importance) - - return z_vals, sdf - - @torch.no_grad() - def get_pts_mask_for_conditional_volume(self, pts, mask_volume): - """ - - :param pts: [N, 3] - :param mask_volume: [1, 1, X, Y, Z] - :return: - """ - num_pts = pts.shape[0] - pts = pts.view(1, 1, 1, num_pts, 3) # - should be in range (-1, 1) - - pts = torch.flip(pts, dims=[-1]) - - pts_mask = F.grid_sample(mask_volume, pts, mode='nearest') # [1, c, 1, 1, num_pts] - pts_mask = pts_mask.view(-1, num_pts).permute(1, 0).contiguous() # [num_pts, 1] - - return pts_mask - - def render_core(self, - rays_o, - rays_d, - z_vals, - sample_dist, - lod, - sdf_network, - rendering_network, - background_alpha=None, # - no use here - background_sampled_color=None, # - no use here - background_rgb=None, # - no use here - alpha_inter_ratio=0.0, - # * related to conditional feature - conditional_volume=None, - conditional_valid_mask_volume=None, - # * 2d feature maps - feature_maps=None, - color_maps=None, - w2cs=None, - intrinsics=None, - img_wh=None, - query_c2w=None, # - used for testing - if_general_rendering=True, - if_render_with_grad=True, - # * used for blending mlp rendering network - img_index=None, - rays_uv=None, - # * used for clear bg and fg - bg_num=0 - ): - device = rays_o.device - N_rays = rays_o.shape[0] - _, n_samples = z_vals.shape - dists = z_vals[..., 1:] - z_vals[..., :-1] - dists = torch.cat([dists, torch.Tensor([sample_dist]).expand(dists[..., :1].shape).to(device)], -1) - - mid_z_vals = z_vals + dists * 0.5 - mid_dists = mid_z_vals[..., 1:] - mid_z_vals[..., :-1] - - pts = rays_o[:, None, :] + rays_d[:, None, :] * mid_z_vals[..., :, None] # n_rays, n_samples, 3 - dirs = rays_d[:, None, :].expand(pts.shape) - - pts = pts.reshape(-1, 3) - dirs = dirs.reshape(-1, 3) - - # * if conditional_volume is restored from sparse volume, need mask for pts - if conditional_valid_mask_volume is not None: - pts_mask = self.get_pts_mask_for_conditional_volume(pts, conditional_valid_mask_volume) - pts_mask = pts_mask.reshape(N_rays, n_samples).float().detach() - pts_mask_bool = (pts_mask > 0).view(-1) - - if torch.sum(pts_mask_bool.float()) < 1: # ! when render out image, may meet this problem - pts_mask_bool[:100] = True - - else: - pts_mask = torch.ones([N_rays, n_samples]).to(pts.device) - # import ipdb; ipdb.set_trace() - # pts_valid = pts[pts_mask_bool] - sdf_nn_output = sdf_network.sdf(pts[pts_mask_bool], conditional_volume, lod=lod) - - sdf = torch.ones([N_rays * n_samples, 1]).to(pts.dtype).to(device) * 100 - sdf[pts_mask_bool] = sdf_nn_output['sdf_pts_scale%d' % lod] # [N_rays*n_samples, 1] - feature_vector_valid = sdf_nn_output['sdf_features_pts_scale%d' % lod] - feature_vector = torch.zeros([N_rays * n_samples, feature_vector_valid.shape[1]]).to(pts.dtype).to(device) - feature_vector[pts_mask_bool] = feature_vector_valid - - # * estimate alpha from sdf - gradients = torch.zeros([N_rays * n_samples, 3]).to(pts.dtype).to(device) - # import ipdb; ipdb.set_trace() - gradients[pts_mask_bool] = sdf_network.gradient( - pts[pts_mask_bool], conditional_volume, lod=lod).squeeze() - - sampled_color_mlp = None - rendering_valid_mask_mlp = None - sampled_color_patch = None - rendering_patch_mask = None - - if self.if_fitted_rendering: # used for fine-tuning - position_latent = sdf_nn_output['sampled_latent_scale%d' % lod] - sampled_color_mlp = torch.zeros([N_rays * n_samples, 3]).to(pts.dtype).to(device) - sampled_color_mlp_mask = torch.zeros([N_rays * n_samples, 1]).to(pts.dtype).to(device) - - # - extract pixel - pts_pixel_color, pts_pixel_mask = self.patch_projector.pixel_warp( - pts[pts_mask_bool][:, None, :], color_maps, intrinsics, - w2cs, img_wh=None) # [N_rays * n_samples,1, N_views, 3] , [N_rays*n_samples, 1, N_views] - pts_pixel_color = pts_pixel_color[:, 0, :, :] # [N_rays * n_samples, N_views, 3] - pts_pixel_mask = pts_pixel_mask[:, 0, :] # [N_rays*n_samples, N_views] - - # - extract patch - if_patch_blending = False if rays_uv is None else True - pts_patch_color, pts_patch_mask = None, None - if if_patch_blending: - pts_patch_color, pts_patch_mask = self.patch_projector.patch_warp( - pts.reshape([N_rays, n_samples, 3]), - rays_uv, gradients.reshape([N_rays, n_samples, 3]), - color_maps, - intrinsics[0], intrinsics, - query_c2w[0], torch.inverse(w2cs), img_wh=None - ) # (N_rays, n_samples, N_src, Npx, 3), (N_rays, n_samples, N_src, Npx) - N_src, Npx = pts_patch_mask.shape[2:] - pts_patch_color = pts_patch_color.view(N_rays * n_samples, N_src, Npx, 3)[pts_mask_bool] - pts_patch_mask = pts_patch_mask.view(N_rays * n_samples, N_src, Npx)[pts_mask_bool] - - sampled_color_patch = torch.zeros([N_rays * n_samples, Npx, 3]).to(device) - sampled_color_patch_mask = torch.zeros([N_rays * n_samples, 1]).to(device) - - sampled_color_mlp_, sampled_color_mlp_mask_, \ - sampled_color_patch_, sampled_color_patch_mask_ = sdf_network.color_blend( - pts[pts_mask_bool], - position_latent, - gradients[pts_mask_bool], - dirs[pts_mask_bool], - feature_vector[pts_mask_bool], - img_index=img_index, - pts_pixel_color=pts_pixel_color, - pts_pixel_mask=pts_pixel_mask, - pts_patch_color=pts_patch_color, - pts_patch_mask=pts_patch_mask - - ) # [n, 3], [n, 1] - sampled_color_mlp[pts_mask_bool] = sampled_color_mlp_ - sampled_color_mlp_mask[pts_mask_bool] = sampled_color_mlp_mask_.float() - sampled_color_mlp = sampled_color_mlp.view(N_rays, n_samples, 3) - sampled_color_mlp_mask = sampled_color_mlp_mask.view(N_rays, n_samples) - rendering_valid_mask_mlp = torch.mean(pts_mask * sampled_color_mlp_mask, dim=-1, keepdim=True) > 0.5 - - # patch blending - if if_patch_blending: - sampled_color_patch[pts_mask_bool] = sampled_color_patch_ - sampled_color_patch_mask[pts_mask_bool] = sampled_color_patch_mask_.float() - sampled_color_patch = sampled_color_patch.view(N_rays, n_samples, Npx, 3) - sampled_color_patch_mask = sampled_color_patch_mask.view(N_rays, n_samples) - rendering_patch_mask = torch.mean(pts_mask * sampled_color_patch_mask, dim=-1, - keepdim=True) > 0.5 # [N_rays, 1] - else: - sampled_color_patch, rendering_patch_mask = None, None - - if if_general_rendering: # used for general training - # [512, 128, 16]; [4, 512, 128, 59]; [4, 512, 128, 4] - ren_geo_feats, ren_rgb_feats, ren_ray_diff, ren_mask, _, _ = self.rendering_projector.compute_view_independent( - pts.view(N_rays, n_samples, 3), - # * 3d geometry feature volumes - geometryVolume=conditional_volume[0], - geometryVolumeMask=conditional_valid_mask_volume[0], - sdf_network=sdf_network, - lod=lod, - # * 2d rendering feature maps - rendering_feature_maps=feature_maps, # [n_views, 56, 256, 256] - color_maps=color_maps, - w2cs=w2cs, - intrinsics=intrinsics, - img_wh=img_wh, - query_img_idx=0, # the index of the N_views dim for rendering - query_c2w=query_c2w, - ) - - # (N_rays, n_samples, 3) - if if_render_with_grad: - # import ipdb; ipdb.set_trace() - # [nrays, 3] [nrays, 1] - sampled_color, rendering_valid_mask = rendering_network( - ren_geo_feats, ren_rgb_feats, ren_ray_diff, ren_mask) - # import ipdb; ipdb.set_trace() - else: - with torch.no_grad(): - sampled_color, rendering_valid_mask = rendering_network( - ren_geo_feats, ren_rgb_feats, ren_ray_diff, ren_mask) - else: - sampled_color, rendering_valid_mask = None, None - - inv_variance = self.variance_network(feature_vector)[:, :1].clip(1e-6, 1e6) - - true_dot_val = (dirs * gradients).sum(-1, keepdim=True) # * calculate - - iter_cos = -(F.relu(-true_dot_val * 0.5 + 0.5) * (1.0 - alpha_inter_ratio) + F.relu( - -true_dot_val) * alpha_inter_ratio) # always non-positive - - iter_cos = iter_cos * pts_mask.view(-1, 1) - - true_estimate_sdf_half_next = sdf + iter_cos.clip(-10.0, 10.0) * dists.reshape(-1, 1) * 0.5 - true_estimate_sdf_half_prev = sdf - iter_cos.clip(-10.0, 10.0) * dists.reshape(-1, 1) * 0.5 - - prev_cdf = torch.sigmoid(true_estimate_sdf_half_prev * inv_variance) - next_cdf = torch.sigmoid(true_estimate_sdf_half_next * inv_variance) - - p = prev_cdf - next_cdf - c = prev_cdf - - if self.alpha_type == 'div': - alpha_sdf = ((p + 1e-5) / (c + 1e-5)).reshape(N_rays, n_samples).clip(0.0, 1.0) - elif self.alpha_type == 'uniform': - uniform_estimate_sdf_half_next = sdf - dists.reshape(-1, 1) * 0.5 - uniform_estimate_sdf_half_prev = sdf + dists.reshape(-1, 1) * 0.5 - uniform_prev_cdf = torch.sigmoid(uniform_estimate_sdf_half_prev * inv_variance) - uniform_next_cdf = torch.sigmoid(uniform_estimate_sdf_half_next * inv_variance) - uniform_alpha = F.relu( - (uniform_prev_cdf - uniform_next_cdf + 1e-5) / (uniform_prev_cdf + 1e-5)).reshape( - N_rays, n_samples).clip(0.0, 1.0) - alpha_sdf = uniform_alpha - else: - assert False - - alpha = alpha_sdf - - # - apply pts_mask - alpha = alpha * pts_mask - - # pts_radius = torch.linalg.norm(pts, ord=2, dim=-1, keepdim=True).reshape(N_rays, n_samples) - # inside_sphere = (pts_radius < 1.0).float().detach() - # relax_inside_sphere = (pts_radius < 1.2).float().detach() - inside_sphere = pts_mask - relax_inside_sphere = pts_mask - - weights = alpha * torch.cumprod(torch.cat([torch.ones([N_rays, 1]).to(device), 1. - alpha + 1e-7], -1), -1)[:, - :-1] # n_rays, n_samples - weights_sum = weights.sum(dim=-1, keepdim=True) - alpha_sum = alpha.sum(dim=-1, keepdim=True) - - if bg_num > 0: - weights_sum_fg = weights[:, :-bg_num].sum(dim=-1, keepdim=True) - else: - weights_sum_fg = weights_sum - - if sampled_color is not None: - color = (sampled_color * weights[:, :, None]).sum(dim=1) - else: - color = None - # import ipdb; ipdb.set_trace() - - if background_rgb is not None and color is not None: - color = color + background_rgb * (1.0 - weights_sum) - # print("color device:" + str(color.device)) - # if color is not None: - # # import ipdb; ipdb.set_trace() - # color = color + (1.0 - weights_sum) - - - ###################* mlp color rendering ##################### - color_mlp = None - # import ipdb; ipdb.set_trace() - if sampled_color_mlp is not None: - color_mlp = (sampled_color_mlp * weights[:, :, None]).sum(dim=1) - - if background_rgb is not None and color_mlp is not None: - color_mlp = color_mlp + background_rgb * (1.0 - weights_sum) - - ############################ * patch blending ################ - blended_color_patch = None - if sampled_color_patch is not None: - blended_color_patch = (sampled_color_patch * weights[:, :, None, None]).sum(dim=1) # [N_rays, Npx, 3] - - ###################################################### - - gradient_error = (torch.linalg.norm(gradients.reshape(N_rays, n_samples, 3), ord=2, - dim=-1) - 1.0) ** 2 - # ! the gradient normal should be masked out, the pts out of the bounding box should also be penalized - gradient_error = (pts_mask * gradient_error).sum() / ( - (pts_mask).sum() + 1e-5) - - depth = (mid_z_vals * weights[:, :n_samples]).sum(dim=1, keepdim=True) - # print("[TEST]: weights_sum in render_core", weights_sum.mean()) - # print("[TEST]: weights_sum in render_core NAN number", weights_sum.isnan().sum()) - # if weights_sum.isnan().sum() > 0: - # import ipdb; ipdb.set_trace() - return { - 'color': color, - 'color_mask': rendering_valid_mask, # (N_rays, 1) - 'color_mlp': color_mlp, - 'color_mlp_mask': rendering_valid_mask_mlp, - 'sdf': sdf, # (N_rays, n_samples) - 'depth': depth, # (N_rays, 1) - 'dists': dists, - 'gradients': gradients.reshape(N_rays, n_samples, 3), - 'variance': 1.0 / inv_variance, - 'mid_z_vals': mid_z_vals, - 'weights': weights, - 'weights_sum': weights_sum, - 'alpha_sum': alpha_sum, - 'alpha_mean': alpha.mean(), - 'cdf': c.reshape(N_rays, n_samples), - 'gradient_error': gradient_error, - 'inside_sphere': inside_sphere, - 'blended_color_patch': blended_color_patch, - 'blended_color_patch_mask': rendering_patch_mask, - 'weights_sum_fg': weights_sum_fg - } - - def render(self, rays_o, rays_d, near, far, sdf_network, rendering_network, - perturb_overwrite=-1, - background_rgb=None, - alpha_inter_ratio=0.0, - # * related to conditional feature - lod=None, - conditional_volume=None, - conditional_valid_mask_volume=None, - # * 2d feature maps - feature_maps=None, - color_maps=None, - w2cs=None, - intrinsics=None, - img_wh=None, - query_c2w=None, # -used for testing - if_general_rendering=True, - if_render_with_grad=True, - # * used for blending mlp rendering network - img_index=None, - rays_uv=None, - # * importance sample for second lod network - pre_sample=False, # no use here - # * for clear foreground - bg_ratio=0.0 - ): - device = rays_o.device - N_rays = len(rays_o) - # sample_dist = 2.0 / self.n_samples - sample_dist = ((far - near) / self.n_samples).mean().item() - z_vals = torch.linspace(0.0, 1.0, self.n_samples).to(device) - z_vals = near + (far - near) * z_vals[None, :] - - bg_num = int(self.n_samples * bg_ratio) - - if z_vals.shape[0] == 1: - z_vals = z_vals.repeat(N_rays, 1) - - if bg_num > 0: - z_vals_bg = z_vals[:, self.n_samples - bg_num:] - z_vals = z_vals[:, :self.n_samples - bg_num] - - n_samples = self.n_samples - bg_num - perturb = self.perturb - - # - significantly speed up training, for the second lod network - if pre_sample: - z_vals = self.sample_z_vals_from_maskVolume(rays_o, rays_d, near, far, - conditional_valid_mask_volume) - - if perturb_overwrite >= 0: - perturb = perturb_overwrite - if perturb > 0: - # get intervals between samples - mids = .5 * (z_vals[..., 1:] + z_vals[..., :-1]) - upper = torch.cat([mids, z_vals[..., -1:]], -1) - lower = torch.cat([z_vals[..., :1], mids], -1) - # stratified samples in those intervals - t_rand = torch.rand(z_vals.shape).to(device) - z_vals = lower + (upper - lower) * t_rand - - background_alpha = None - background_sampled_color = None - z_val_before = z_vals.clone() - # Up sample - if self.n_importance > 0: - with torch.no_grad(): - pts = rays_o[:, None, :] + rays_d[:, None, :] * z_vals[..., :, None] - - sdf_outputs = sdf_network.sdf( - pts.reshape(-1, 3), conditional_volume, lod=lod) - # pdb.set_trace() - sdf = sdf_outputs['sdf_pts_scale%d' % lod].reshape(N_rays, self.n_samples - bg_num) - - n_steps = 4 - for i in range(n_steps): - new_z_vals = self.up_sample(rays_o, rays_d, z_vals, sdf, self.n_importance // n_steps, - 64 * 2 ** i, - conditional_valid_mask_volume=conditional_valid_mask_volume, - ) - - # if new_z_vals.isnan().sum() > 0: - # import ipdb; ipdb.set_trace() - - z_vals, sdf = self.cat_z_vals( - rays_o, rays_d, z_vals, new_z_vals, sdf, lod, - sdf_network, gru_fusion=False, - conditional_volume=conditional_volume, - conditional_valid_mask_volume=conditional_valid_mask_volume, - ) - - del sdf - - n_samples = self.n_samples + self.n_importance - - # Background - ret_outside = None - - # Render - if bg_num > 0: - z_vals = torch.cat([z_vals, z_vals_bg], dim=1) - # if z_vals.isnan().sum() > 0: - # import ipdb; ipdb.set_trace() - ret_fine = self.render_core(rays_o, - rays_d, - z_vals, - sample_dist, - lod, - sdf_network, - rendering_network, - background_rgb=background_rgb, - background_alpha=background_alpha, - background_sampled_color=background_sampled_color, - alpha_inter_ratio=alpha_inter_ratio, - # * related to conditional feature - conditional_volume=conditional_volume, - conditional_valid_mask_volume=conditional_valid_mask_volume, - # * 2d feature maps - feature_maps=feature_maps, - color_maps=color_maps, - w2cs=w2cs, - intrinsics=intrinsics, - img_wh=img_wh, - query_c2w=query_c2w, - if_general_rendering=if_general_rendering, - if_render_with_grad=if_render_with_grad, - # * used for blending mlp rendering network - img_index=img_index, - rays_uv=rays_uv - ) - - color_fine = ret_fine['color'] - - if self.n_outside > 0: - color_fine_mask = torch.logical_or(ret_fine['color_mask'], ret_outside['color_mask']) - else: - color_fine_mask = ret_fine['color_mask'] - - weights = ret_fine['weights'] - weights_sum = ret_fine['weights_sum'] - - gradients = ret_fine['gradients'] - mid_z_vals = ret_fine['mid_z_vals'] - - # depth = (mid_z_vals * weights[:, :n_samples]).sum(dim=1, keepdim=True) - depth = ret_fine['depth'] - depth_varaince = ((mid_z_vals - depth) ** 2 * weights[:, :n_samples]).sum(dim=-1, keepdim=True) - variance = ret_fine['variance'].reshape(N_rays, n_samples).mean(dim=-1, keepdim=True) - - # - randomly sample points from the volume, and maximize the sdf - pts_random = torch.rand([1024, 3]).float().to(device) * 2 - 1 # normalized to (-1, 1) - sdf_random = sdf_network.sdf(pts_random, conditional_volume, lod=lod)['sdf_pts_scale%d' % lod] - - result = { - 'depth': depth, - 'color_fine': color_fine, - 'color_fine_mask': color_fine_mask, - 'color_outside': ret_outside['color'] if ret_outside is not None else None, - 'color_outside_mask': ret_outside['color_mask'] if ret_outside is not None else None, - 'color_mlp': ret_fine['color_mlp'], - 'color_mlp_mask': ret_fine['color_mlp_mask'], - 'variance': variance.mean(), - 'cdf_fine': ret_fine['cdf'], - 'depth_variance': depth_varaince, - 'weights_sum': weights_sum, - 'weights_max': torch.max(weights, dim=-1, keepdim=True)[0], - 'alpha_sum': ret_fine['alpha_sum'].mean(), - 'alpha_mean': ret_fine['alpha_mean'], - 'gradients': gradients, - 'weights': weights, - 'gradient_error_fine': ret_fine['gradient_error'], - 'inside_sphere': ret_fine['inside_sphere'], - 'sdf': ret_fine['sdf'], - 'sdf_random': sdf_random, - 'blended_color_patch': ret_fine['blended_color_patch'], - 'blended_color_patch_mask': ret_fine['blended_color_patch_mask'], - 'weights_sum_fg': ret_fine['weights_sum_fg'] - } - - return result - - @torch.no_grad() - def sample_z_vals_from_sdfVolume(self, rays_o, rays_d, near, far, sdf_volume, mask_volume): - # ? based on sdf to do importance sampling, seems that too biased on pre-estimation - device = rays_o.device - N_rays = len(rays_o) - n_samples = self.n_samples * 2 - - z_vals = torch.linspace(0.0, 1.0, n_samples).to(device) - z_vals = near + (far - near) * z_vals[None, :] - - if z_vals.shape[0] == 1: - z_vals = z_vals.repeat(N_rays, 1) - - pts = rays_o[:, None, :] + rays_d[:, None, :] * z_vals[..., :, None] - - sdf = self.get_pts_mask_for_conditional_volume(pts.view(-1, 3), sdf_volume).reshape([N_rays, n_samples]) - - new_z_vals = self.up_sample(rays_o, rays_d, z_vals, sdf, self.n_samples, - 200, - conditional_valid_mask_volume=mask_volume, - ) - return new_z_vals - - @torch.no_grad() - def sample_z_vals_from_maskVolume(self, rays_o, rays_d, near, far, mask_volume): # don't use - device = rays_o.device - N_rays = len(rays_o) - n_samples = self.n_samples * 2 - - z_vals = torch.linspace(0.0, 1.0, n_samples).to(device) - z_vals = near + (far - near) * z_vals[None, :] - - if z_vals.shape[0] == 1: - z_vals = z_vals.repeat(N_rays, 1) - - mid_z_vals = (z_vals[:, 1:] + z_vals[:, :-1]) * 0.5 - - pts = rays_o[:, None, :] + rays_d[:, None, :] * mid_z_vals[..., :, None] - - pts_mask = self.get_pts_mask_for_conditional_volume(pts.view(-1, 3), mask_volume).reshape( - [N_rays, n_samples - 1]) - - # empty voxel set to 0.1, non-empty voxel set to 1 - weights = torch.where(pts_mask > 0, torch.ones_like(pts_mask).to(device), - 0.1 * torch.ones_like(pts_mask).to(device)) - - # sample more pts in non-empty voxels - z_samples = sample_pdf(z_vals, weights, self.n_samples, det=True).detach() - return z_samples - - @torch.no_grad() - def filter_pts_by_depthmaps(self, coords, pred_depth_maps, proj_matrices, - partial_vol_origin, voxel_size, - near, far, depth_interval, d_plane_nums): - """ - Use the pred_depthmaps to remove redundant pts (pruned by sdf, sdf always have two sides, the back side is useless) - :param coords: [n, 3] int coords - :param pred_depth_maps: [N_views, 1, h, w] - :param proj_matrices: [N_views, 4, 4] - :param partial_vol_origin: [3] - :param voxel_size: 1 - :param near: 1 - :param far: 1 - :param depth_interval: 1 - :param d_plane_nums: 1 - :return: - """ - device = pred_depth_maps.device - n_views, _, sizeH, sizeW = pred_depth_maps.shape - - if len(partial_vol_origin.shape) == 1: - partial_vol_origin = partial_vol_origin[None, :] - pts = coords * voxel_size + partial_vol_origin - - rs_grid = pts.unsqueeze(0).expand(n_views, -1, -1) - rs_grid = rs_grid.permute(0, 2, 1).contiguous() # [n_views, 3, n_pts] - nV = rs_grid.shape[-1] - rs_grid = torch.cat([rs_grid, torch.ones([n_views, 1, nV]).to(device)], dim=1) # [n_views, 4, n_pts] - - # Project grid - im_p = proj_matrices @ rs_grid # - transform world pts to image UV space # [n_views, 4, n_pts] - im_x, im_y, im_z = im_p[:, 0], im_p[:, 1], im_p[:, 2] - im_x = im_x / im_z - im_y = im_y / im_z - - im_grid = torch.stack([2 * im_x / (sizeW - 1) - 1, 2 * im_y / (sizeH - 1) - 1], dim=-1) - - im_grid = im_grid.view(n_views, 1, -1, 2) - sampled_depths = torch.nn.functional.grid_sample(pred_depth_maps, im_grid, mode='bilinear', - padding_mode='zeros', - align_corners=True)[:, 0, 0, :] # [n_views, n_pts] - sampled_depths_valid = (sampled_depths > 0.5 * near).float() - valid_d_min = (sampled_depths - d_plane_nums * depth_interval).clamp(near.item(), - far.item()) * sampled_depths_valid - valid_d_max = (sampled_depths + d_plane_nums * depth_interval).clamp(near.item(), - far.item()) * sampled_depths_valid - - mask = im_grid.abs() <= 1 - mask = mask[:, 0] # [n_views, n_pts, 2] - mask = (mask.sum(dim=-1) == 2) & (im_z > valid_d_min) & (im_z < valid_d_max) - - mask = mask.view(n_views, -1) - mask = mask.permute(1, 0).contiguous() # [num_pts, nviews] - - mask_final = torch.sum(mask.float(), dim=1, keepdim=False) > 0 - - return mask_final - - @torch.no_grad() - def get_valid_sparse_coords_by_sdf_depthfilter(self, sdf_volume, coords_volume, mask_volume, feature_volume, - pred_depth_maps, proj_matrices, - partial_vol_origin, voxel_size, - near, far, depth_interval, d_plane_nums, - threshold=0.02, maximum_pts=110000): - """ - assume batch size == 1, from the first lod to get sparse voxels - :param sdf_volume: [1, X, Y, Z] - :param coords_volume: [3, X, Y, Z] - :param mask_volume: [1, X, Y, Z] - :param feature_volume: [C, X, Y, Z] - :param threshold: - :return: - """ - device = coords_volume.device - _, dX, dY, dZ = coords_volume.shape - - def prune(sdf_pts, coords_pts, mask_volume, threshold): - occupancy_mask = (torch.abs(sdf_pts) < threshold).squeeze(1) # [num_pts] - valid_coords = coords_pts[occupancy_mask] - - # - filter backside surface by depth maps - mask_filtered = self.filter_pts_by_depthmaps(valid_coords, pred_depth_maps, proj_matrices, - partial_vol_origin, voxel_size, - near, far, depth_interval, d_plane_nums) - valid_coords = valid_coords[mask_filtered] - - # - dilate - occupancy_mask = sparse_to_dense_channel(valid_coords, 1, [dX, dY, dZ], 1, 0, device) # [dX, dY, dZ, 1] - - # - dilate - occupancy_mask = occupancy_mask.float() - occupancy_mask = occupancy_mask.view(1, 1, dX, dY, dZ) - occupancy_mask = F.avg_pool3d(occupancy_mask, kernel_size=7, stride=1, padding=3) - occupancy_mask = occupancy_mask.view(-1, 1) > 0 - - final_mask = torch.logical_and(mask_volume, occupancy_mask)[:, 0] # [num_pts] - - return final_mask, torch.sum(final_mask.float()) - - C, dX, dY, dZ = feature_volume.shape - sdf_volume = sdf_volume.permute(1, 2, 3, 0).contiguous().view(-1, 1) - coords_volume = coords_volume.permute(1, 2, 3, 0).contiguous().view(-1, 3) - mask_volume = mask_volume.permute(1, 2, 3, 0).contiguous().view(-1, 1) - feature_volume = feature_volume.permute(1, 2, 3, 0).contiguous().view(-1, C) - - # - for check - # sdf_volume = torch.rand_like(sdf_volume).float().to(sdf_volume.device) * 0.02 - - final_mask, valid_num = prune(sdf_volume, coords_volume, mask_volume, threshold) - - while (valid_num > maximum_pts) and (threshold > 0.003): - threshold = threshold - 0.002 - final_mask, valid_num = prune(sdf_volume, coords_volume, mask_volume, threshold) - - valid_coords = coords_volume[final_mask] # [N, 3] - valid_feature = feature_volume[final_mask] # [N, C] - - valid_coords = torch.cat([torch.ones([valid_coords.shape[0], 1]).to(valid_coords.device) * 0, - valid_coords], dim=1) # [N, 4], append batch idx - - # ! if the valid_num is still larger than maximum_pts, sample part of pts - if valid_num > maximum_pts: - valid_num = valid_num.long() - occupancy = torch.ones([valid_num]).to(device) > 0 - choice = np.random.choice(valid_num.cpu().numpy(), valid_num.cpu().numpy() - maximum_pts, - replace=False) - ind = torch.nonzero(occupancy).to(device) - occupancy[ind[choice]] = False - valid_coords = valid_coords[occupancy] - valid_feature = valid_feature[occupancy] - - print(threshold, "randomly sample to save memory") - - return valid_coords, valid_feature - - @torch.no_grad() - def get_valid_sparse_coords_by_sdf(self, sdf_volume, coords_volume, mask_volume, feature_volume, threshold=0.02, - maximum_pts=110000): - """ - assume batch size == 1, from the first lod to get sparse voxels - :param sdf_volume: [num_pts, 1] - :param coords_volume: [3, X, Y, Z] - :param mask_volume: [1, X, Y, Z] - :param feature_volume: [C, X, Y, Z] - :param threshold: - :return: - """ - - def prune(sdf_volume, mask_volume, threshold): - occupancy_mask = torch.abs(sdf_volume) < threshold # [num_pts, 1] - - # - dilate - occupancy_mask = occupancy_mask.float() - occupancy_mask = occupancy_mask.view(1, 1, dX, dY, dZ) - occupancy_mask = F.avg_pool3d(occupancy_mask, kernel_size=7, stride=1, padding=3) - occupancy_mask = occupancy_mask.view(-1, 1) > 0 - - final_mask = torch.logical_and(mask_volume, occupancy_mask)[:, 0] # [num_pts] - - return final_mask, torch.sum(final_mask.float()) - - C, dX, dY, dZ = feature_volume.shape - coords_volume = coords_volume.permute(1, 2, 3, 0).contiguous().view(-1, 3) - mask_volume = mask_volume.permute(1, 2, 3, 0).contiguous().view(-1, 1) - feature_volume = feature_volume.permute(1, 2, 3, 0).contiguous().view(-1, C) - - final_mask, valid_num = prune(sdf_volume, mask_volume, threshold) - - while (valid_num > maximum_pts) and (threshold > 0.003): - threshold = threshold - 0.002 - final_mask, valid_num = prune(sdf_volume, mask_volume, threshold) - - valid_coords = coords_volume[final_mask] # [N, 3] - valid_feature = feature_volume[final_mask] # [N, C] - - valid_coords = torch.cat([torch.ones([valid_coords.shape[0], 1]).to(valid_coords.device) * 0, - valid_coords], dim=1) # [N, 4], append batch idx - - # ! if the valid_num is still larger than maximum_pts, sample part of pts - if valid_num > maximum_pts: - device = sdf_volume.device - valid_num = valid_num.long() - occupancy = torch.ones([valid_num]).to(device) > 0 - choice = np.random.choice(valid_num.cpu().numpy(), valid_num.cpu().numpy() - maximum_pts, - replace=False) - ind = torch.nonzero(occupancy).to(device) - occupancy[ind[choice]] = False - valid_coords = valid_coords[occupancy] - valid_feature = valid_feature[occupancy] - - print(threshold, "randomly sample to save memory") - - return valid_coords, valid_feature - - @torch.no_grad() - def extract_fields(self, bound_min, bound_max, resolution, query_func, device, - # * related to conditional feature - **kwargs - ): - N = 64 - X = torch.linspace(bound_min[0], bound_max[0], resolution).split(N) - Y = torch.linspace(bound_min[1], bound_max[1], resolution).split(N) - Z = torch.linspace(bound_min[2], bound_max[2], resolution).split(N) - - u = np.zeros([resolution, resolution, resolution], dtype=np.float32) - with torch.no_grad(): - for xi, xs in enumerate(X): - for yi, ys in enumerate(Y): - for zi, zs in enumerate(Z): - xx, yy, zz = torch.meshgrid(xs, ys, zs) - pts = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1).to(device) - - # ! attention, the query function is different for extract geometry and fields - output = query_func(pts, **kwargs) - sdf = output['sdf_pts_scale%d' % kwargs['lod']].reshape(len(xs), len(ys), - len(zs)).detach().cpu().numpy() - - u[xi * N: xi * N + len(xs), yi * N: yi * N + len(ys), zi * N: zi * N + len(zs)] = -1 * sdf - return u - - @torch.no_grad() - def extract_geometry(self, sdf_network, bound_min, bound_max, resolution, threshold, device, occupancy_mask=None, - # * 3d feature volume - **kwargs - ): - # logging.info('threshold: {}'.format(threshold)) - - u = self.extract_fields(bound_min, bound_max, resolution, - lambda pts, **kwargs: sdf_network.sdf(pts, **kwargs), - # - sdf need to be multiplied by -1 - device, - # * 3d feature volume - **kwargs - ) - if occupancy_mask is not None: - dX, dY, dZ = occupancy_mask.shape - empty_mask = 1 - occupancy_mask - empty_mask = empty_mask.view(1, 1, dX, dY, dZ) - # - dilation - # empty_mask = F.avg_pool3d(empty_mask, kernel_size=7, stride=1, padding=3) - empty_mask = F.interpolate(empty_mask, [resolution, resolution, resolution], mode='nearest') - empty_mask = empty_mask.view(resolution, resolution, resolution).cpu().numpy() > 0 - u[empty_mask] = -100 - del empty_mask - - vertices, triangles = mcubes.marching_cubes(u, threshold) - b_max_np = bound_max.detach().cpu().numpy() - b_min_np = bound_min.detach().cpu().numpy() - - vertices = vertices / (resolution - 1.0) * (b_max_np - b_min_np)[None, :] + b_min_np[None, :] - return vertices, triangles, u - - @torch.no_grad() - def extract_depth_maps(self, sdf_network, con_volume, intrinsics, c2ws, H, W, near, far): - """ - extract depth maps from the density volume - :param con_volume: [1, 1+C, dX, dY, dZ] can by con_volume or sdf_volume - :param c2ws: [B, 4, 4] - :param H: - :param W: - :param near: - :param far: - :return: - """ - device = con_volume.device - batch_size = intrinsics.shape[0] - - with torch.no_grad(): - ys, xs = torch.meshgrid(torch.linspace(0, H - 1, H), - torch.linspace(0, W - 1, W)) # pytorch's meshgrid has indexing='ij' - p = torch.stack([xs, ys, torch.ones_like(ys)], dim=-1) # H, W, 3 - - intrinsics_inv = torch.inverse(intrinsics) - - p = p.view(-1, 3).float().to(device) # N_rays, 3 - p = torch.matmul(intrinsics_inv[:, None, :3, :3], p[:, :, None]).squeeze() # Batch, N_rays, 3 - rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # Batch, N_rays, 3 - rays_v = torch.matmul(c2ws[:, None, :3, :3], rays_v[:, :, :, None]).squeeze() # Batch, N_rays, 3 - rays_o = c2ws[:, None, :3, 3].expand(rays_v.shape) # Batch, N_rays, 3 - rays_d = rays_v - - rays_o = rays_o.contiguous().view(-1, 3) - rays_d = rays_d.contiguous().view(-1, 3) - - ################## - sphere tracer to extract depth maps ###################### - depth_masks_sphere, depth_maps_sphere = self.ray_tracer.extract_depth_maps( - rays_o, rays_d, - near[None, :].repeat(rays_o.shape[0], 1), - far[None, :].repeat(rays_o.shape[0], 1), - sdf_network, con_volume - ) - - depth_maps = depth_maps_sphere.view(batch_size, 1, H, W) - depth_masks = depth_masks_sphere.view(batch_size, 1, H, W) - - depth_maps = torch.where(depth_masks, depth_maps, - torch.zeros_like(depth_masks.float()).to(device)) # fill invalid pixels by 0 - - return depth_maps, depth_masks diff --git a/SparseNeuS_demo_v1/models/trainer_finetune.py b/SparseNeuS_demo_v1/models/trainer_finetune.py deleted file mode 100644 index e6203976b2a72dea61e1e728a3b1a225366f56a2..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/models/trainer_finetune.py +++ /dev/null @@ -1,979 +0,0 @@ -""" -Trainer for fine-tuning -""" -import os -import cv2 as cv -import torch -import torch.nn as nn -import torch.nn.functional as F - -import numpy as np -import logging -import mcubes -import trimesh -from icecream import ic -from models.render_utils import sample_pdf -from utils.misc_utils import visualize_depth_numpy - -from utils.training_utils import tocuda, numpy2tensor -from loss.depth_metric import compute_depth_errors -from loss.color_loss import OcclusionColorLoss, OcclusionColorPatchLoss -from loss.depth_loss import DepthLoss, DepthSmoothLoss - -from models.projector import Projector - -from models.rays import gen_rays_between - -from models.sparse_neus_renderer import SparseNeuSRenderer - -import pdb - - -class FinetuneTrainer(nn.Module): - """ - Trainer used for fine-tuning - """ - - def __init__(self, - rendering_network_outside, - pyramid_feature_network_lod0, - pyramid_feature_network_lod1, - sdf_network_lod0, - sdf_network_lod1, - variance_network_lod0, - variance_network_lod1, - sdf_network_finetune, - finetune_lod, # which lod fine-tuning use - n_samples, - n_importance, - n_outside, - perturb, - alpha_type='div', - conf=None - ): - super(FinetuneTrainer, self).__init__() - - self.conf = conf - self.base_exp_dir = conf['general.base_exp_dir'] - - self.finetune_lod = finetune_lod - - self.anneal_start = self.conf.get_float('train.anneal_start', default=0.0) - self.anneal_end = self.conf.get_float('train.anneal_end', default=0.0) - self.end_iter = self.conf.get_int('train.end_iter') - - # network setups - self.rendering_network_outside = rendering_network_outside - self.pyramid_feature_network_geometry_lod0 = pyramid_feature_network_lod0 # 2D pyramid feature network for geometry - self.pyramid_feature_network_geometry_lod1 = pyramid_feature_network_lod1 # use differnet networks for the two lods - - self.sdf_network_lod0 = sdf_network_lod0 # the first lod is density_network - self.sdf_network_lod1 = sdf_network_lod1 - - # - warpped by ModuleList to support DataParallel - self.variance_network_lod0 = variance_network_lod0 - self.variance_network_lod1 = variance_network_lod1 - self.variance_network_finetune = variance_network_lod0 if self.finetune_lod == 0 else variance_network_lod1 - - self.sdf_network_finetune = sdf_network_finetune - - self.n_samples = n_samples - self.n_importance = n_importance - self.n_outside = n_outside - self.perturb = perturb - self.alpha_type = alpha_type - - self.sdf_renderer_finetune = SparseNeuSRenderer( - self.rendering_network_outside, - self.sdf_network_finetune, - self.variance_network_finetune, - None, # rendering_network - self.n_samples, - self.n_importance, - self.n_outside, - self.perturb, - alpha_type='div', - conf=self.conf) - - # sdf network weights - self.sdf_igr_weight = self.conf.get_float('train.sdf_igr_weight') - self.sdf_sparse_weight = self.conf.get_float('train.sdf_sparse_weight', default=0) - - self.sdf_decay_param = self.conf.get_float('train.sdf_decay_param', default=100) - self.color_pixel_weight = self.conf.get_float('train.color_pixel_weight', default=1.0) - self.color_patch_weight = self.conf.get_float('train.color_patch_weight', default=0.) - self.tv_weight = self.conf.get_float('train.tv_weight', default=0.001) # no use - self.visibility_beta = self.conf.get_float('train.visibility_beta', default=0.025) - self.visibility_gama = self.conf.get_float('train.visibility_gama', default=0.015) - self.visibility_penalize_ratio = self.conf.get_float('train.visibility_penalize_ratio', default=0.8) - self.visibility_weight_thred = self.conf.get_list('train.visibility_weight_thred', default=[0.7]) - self.if_visibility_aware = self.conf.get_bool('train.if_visibility_aware', default=True) - self.train_from_scratch = self.conf.get_bool('train.train_from_scratch', default=False) - - self.depth_criterion = DepthLoss() - self.depth_smooth_criterion = DepthSmoothLoss() - self.occlusion_color_criterion = OcclusionColorLoss(beta=self.visibility_beta, - gama=self.visibility_gama, - weight_thred=self.visibility_weight_thred, - occlusion_aware=self.if_visibility_aware) - self.occlusion_color_patch_criterion = OcclusionColorPatchLoss( - type=self.conf.get_string('train.patch_loss_type', default='ncc'), - h_patch_size=self.conf.get_int('model.h_patch_size', default=5), - beta=self.visibility_beta, gama=self.visibility_gama, - weight_thred=self.visibility_weight_thred, - occlusion_aware=self.if_visibility_aware - ) - - # self.iter_step = 0 - self.val_mesh_freq = self.conf.get_int('train.val_mesh_freq') - - # - True if fine-tuning - self.if_fitted_rendering = self.conf.get_bool('train.if_fitted_rendering', default=False) - - def get_trainable_params(self): - # set trainable params - - params = [] - faster_params = [] - slower_params = [] - - params += self.variance_network_finetune.parameters() - slower_params += self.sdf_network_finetune.sparse_volume_lod0.parameters() - params += self.sdf_network_finetune.sdf_layer.parameters() - - faster_params += self.sdf_network_finetune.renderer.parameters() - - self.params_to_train = { - 'slower_params': slower_params, - 'params': params, - 'faster_params': faster_params - } - - return self.params_to_train - - @torch.no_grad() - def prepare_con_volume(self, sample): - # * only support batch_size==1 - sizeW = sample['img_wh'][0] - sizeH = sample['img_wh'][1] - partial_vol_origin = sample['partial_vol_origin'][None, :] # [B, 3] - near, far = sample['near_fars'][0, :1], sample['near_fars'][0, 1:] - near = 0.8 * near - far = 1.2 * far - - imgs = sample['images'] - intrinsics = sample['intrinsics'] - intrinsics_l_4x = intrinsics.clone() - intrinsics_l_4x[:, :2] *= 0.25 - w2cs = sample['w2cs'] - c2ws = sample['c2ws'] - proj_matrices = sample['affine_mats'][None, :, :, :] - - # *********************** Lod==0 *********************** - - with torch.no_grad(): - geometry_feature_maps = self.obtain_pyramid_feature_maps(imgs) - # import ipdb; ipdb.set_trace() - conditional_features_lod0 = self.sdf_network_lod0.get_conditional_volume( - feature_maps=geometry_feature_maps[None, :, :, :, :], - partial_vol_origin=partial_vol_origin, - proj_mats=proj_matrices, - sizeH=sizeH, - sizeW=sizeW, - lod=0, - ) - - con_volume_lod0 = conditional_features_lod0['dense_volume_scale0'] - - con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0'] - coords_lod0 = conditional_features_lod0['coords_scale0'] # [1,3,wX,wY,wZ] - - if self.finetune_lod == 0: - return con_volume_lod0, con_valid_mask_volume_lod0, coords_lod0 - - # * extract depth maps for all the images for adaptive rendering_network - depth_maps_lod0, depth_masks_lod0 = None, None - if self.finetune_lod == 1: - sdf_volume_lod0 = self.sdf_network_lod0.get_sdf_volume( - con_volume_lod0, con_valid_mask_volume_lod0, - coords_lod0, partial_vol_origin) # [1, 1, dX, dY, dZ] - - if self.finetune_lod == 1: - geometry_feature_maps_lod1 = self.obtain_pyramid_feature_maps(imgs, lod=1) - - pre_coords, pre_feats = self.sdf_renderer_finetune.get_valid_sparse_coords_by_sdf( - sdf_volume_lod0[0], coords_lod0[0], con_valid_mask_volume_lod0[0], con_volume_lod0[0], - maximum_pts=200000) - - pre_coords[:, 1:] = pre_coords[:, 1:] * 2 - - conditional_features_lod1 = self.sdf_network_lod1.get_conditional_volume( - feature_maps=geometry_feature_maps_lod1[None, :, :, :, :], - partial_vol_origin=partial_vol_origin, - proj_mats=proj_matrices, - sizeH=sizeH, - sizeW=sizeW, - pre_coords=pre_coords, - pre_feats=pre_feats - ) - - con_volume_lod1 = conditional_features_lod1['dense_volume_scale1'] - con_valid_mask_volume_lod1 = conditional_features_lod1['valid_mask_volume_scale1'] - coords_lod1 = conditional_features_lod1['coords_scale1'] # [1,3,wX,wY,wZ] - con_valid_mask_volume_lod0 = F.interpolate(con_valid_mask_volume_lod0, scale_factor=2) - - return con_volume_lod1, con_valid_mask_volume_lod1, coords_lod1 - - def initialize_finetune_network(self, sample, sparse_con_volume=None, sparse_coords_volume=None, - train_from_scratch=False): - - if not train_from_scratch: - if sparse_con_volume is None: # if the - - con_volume, con_mask_volume, _ = self.prepare_con_volume(sample) - - device = con_volume.device - - self.sdf_network_finetune.initialize_conditional_volumes( - con_volume, - con_mask_volume - ) - else: - self.sdf_network_finetune.initialize_conditional_volumes( - None, - None, - sparse_con_volume, - sparse_coords_volume - ) - else: - device = sample['images'].device - vol_dims = self.sdf_network_finetune.vol_dims - con_volume = torch.zeros( - [1, self.sdf_network_finetune.regnet_d_out, vol_dims[0], vol_dims[1], vol_dims[2]]).to(device) - con_mask_volume = torch.ones([1, 1, vol_dims[0], vol_dims[1], vol_dims[2]]).to(device) - self.sdf_network_finetune.initialize_conditional_volumes( - con_volume, - con_mask_volume - ) - - self.sdf_network_lod0, self.sdf_network_lod1 = None, None - self.pyramid_feature_network_geometry_lod0, self.pyramid_feature_network_geometry_lod1 = None, None - - def train_step(self, sample, - perturb_overwrite=-1, - background_rgb=None, - iter_step=0, - chunk_size=512, - save_vis=False, - ): - - # * finetune on one specific scene - # * only support batch_size==1 - # ! attention: the list of string cannot be splited in DataParallel - batch_idx = sample['batch_idx'][0] - meta = sample['meta'][batch_idx] # the scan lighting ref_view info - - sizeW = sample['img_wh'][0][0] - sizeH = sample['img_wh'][0][1] - partial_vol_origin = sample['partial_vol_origin'] # [B, 3] - near, far = sample['query_near_far'][0, :1], sample['query_near_far'][0, 1:] - - img_index = sample['img_index'][0] # [n] - - # the full-size ray variables - sample_rays = sample['rays'] - rays_o = sample_rays['rays_o'][0] - rays_d = sample_rays['rays_v'][0] - rays_ndc_uv = sample_rays['rays_ndc_uv'][0] - - imgs = sample['images'][0] - intrinsics = sample['intrinsics'][0] - w2cs = sample['w2cs'][0] - proj_matrices = sample['affine_mats'] - scale_mat = sample['scale_mat'] - trans_mat = sample['trans_mat'] - - query_c2w = sample['query_c2w'] - - # *********************** Lod==0 *********************** - - conditional_features_lod0 = self.sdf_network_finetune.get_conditional_volume() - - con_volume_lod0 = conditional_features_lod0['dense_volume_scale0'] - con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0'] - - # coords_lod0 = conditional_features_lod0['coords_scale0'] # [1,3,wX,wY,wZ] - - # # - extract mesh - if iter_step % self.val_mesh_freq == 0: - torch.cuda.empty_cache() - self.validate_mesh(self.sdf_network_finetune, - self.sdf_renderer_finetune.extract_geometry, - conditional_volume=con_volume_lod0, - lod=0, - threshold=0., - occupancy_mask=con_valid_mask_volume_lod0[0, 0], - mode='ft', meta=meta, - iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat) - - torch.cuda.empty_cache() - - render_out = self.sdf_renderer_finetune.render( - rays_o, rays_d, near, far, - self.sdf_network_finetune, - None, # rendering_network - background_rgb=background_rgb, - alpha_inter_ratio=1.0, - # * related to conditional feature - lod=0, - conditional_volume=con_volume_lod0, - conditional_valid_mask_volume=con_valid_mask_volume_lod0, - # * 2d feature maps - feature_maps=None, - color_maps=imgs, - w2cs=w2cs, - intrinsics=intrinsics, - img_wh=[sizeW, sizeH], - query_c2w=query_c2w, - if_general_rendering=False, - img_index=img_index, - rays_uv=rays_ndc_uv if self.color_patch_weight > 0 else None, - ) - - # * optional TV regularizer, we don't use in this paper - if self.tv_weight > 0: - tv = self.sdf_network_finetune.tv_regularizer() - else: - tv = 0.0 - render_out['tv'] = tv - loss_lod0, losses_lod0, depth_statis_lod0 = self.cal_losses_sdf(render_out, sample_rays, iter_step) - - losses = { - # - lod 0 - 'loss_lod0': loss_lod0, - 'losses_lod0': losses_lod0, - 'depth_statis_lod0': depth_statis_lod0, - } - - return losses - - def val_step(self, sample, - perturb_overwrite=-1, - background_rgb=None, - iter_step=0, - chunk_size=512, - save_vis=True, - ): - # * only support batch_size==1 - # ! attention: the list of string cannot be splited in DataParallel - batch_idx = sample['batch_idx'][0] - meta = sample['meta'][batch_idx] # the scan lighting ref_view info - - sizeW = sample['img_wh'][0][0] - sizeH = sample['img_wh'][0][1] - H, W = sizeH, sizeW - - partial_vol_origin = sample['partial_vol_origin'] # [B, 3] - near, far = sample['query_near_far'][0, :1], sample['query_near_far'][0, 1:] - - img_index = sample['img_index'][0] # [n] - - # the ray variables - sample_rays = sample['rays'] - rays_o = sample_rays['rays_o'][0] - rays_d = sample_rays['rays_v'][0] - rays_ndc_uv = sample_rays['rays_ndc_uv'][0] - - imgs = sample['images'][0] - intrinsics = sample['intrinsics'][0] - intrinsics_l_4x = intrinsics.clone() - intrinsics_l_4x[:, :2] *= 0.25 - w2cs = sample['w2cs'][0] - c2ws = sample['c2ws'][0] - proj_matrices = sample['affine_mats'] - - # - the image to render - scale_mat = sample['scale_mat'] # [1,4,4] used to convert mesh into true scale - trans_mat = sample['trans_mat'] - query_c2w = sample['query_c2w'] # [1,4,4] - query_w2c = sample['query_w2c'] # [1,4,4] - true_img = sample['query_image'][0] - true_img = np.uint8(true_img.permute(1, 2, 0).cpu().numpy() * 255) - - depth_min, depth_max = near.cpu().numpy(), far.cpu().numpy() - - true_depth = sample['query_depth'] if 'query_depth' in sample.keys() else None - if true_depth is not None: - true_depth = true_depth[0].cpu().numpy() - true_depth_colored = visualize_depth_numpy(true_depth, [depth_min, depth_max])[0] - else: - true_depth_colored = None - - rays_o = rays_o.reshape(-1, 3).split(chunk_size) - rays_d = rays_d.reshape(-1, 3).split(chunk_size) - - # - obtain conditional features - with torch.no_grad(): - # - lod 0 - conditional_features_lod0 = self.sdf_network_finetune.get_conditional_volume() - - con_volume_lod0 = conditional_features_lod0['dense_volume_scale0'] - con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0'] - # coords_lod0 = conditional_features_lod0['coords_scale0'] # [1,3,wX,wY,wZ] - - out_rgb_fine = [] - out_normal_fine = [] - out_depth_fine = [] - - out_rgb_mlp = [] - - if save_vis: - for rays_o_batch, rays_d_batch in zip(rays_o, rays_d): - - # ****** lod 0 **** - render_out = self.sdf_renderer_finetune.render( - rays_o_batch, rays_d_batch, near, far, - self.sdf_network_finetune, - None, - background_rgb=background_rgb, - alpha_inter_ratio=1., - # * related to conditional feature - lod=0, - conditional_volume=con_volume_lod0, - conditional_valid_mask_volume=con_valid_mask_volume_lod0, - # * 2d feature maps - feature_maps=None, - color_maps=imgs, - w2cs=w2cs, - intrinsics=intrinsics, - img_wh=[sizeW, sizeH], - query_c2w=query_c2w, - if_general_rendering=False, - if_render_with_grad=False, - img_index=img_index, - # rays_uv=rays_ndc_uv - ) - - feasible = lambda key: ((key in render_out) and (render_out[key] is not None)) - - if feasible('depth'): - out_depth_fine.append(render_out['depth'].detach().cpu().numpy()) - - # if render_out['color_coarse'] is not None: - if feasible('color_fine'): - out_rgb_fine.append(render_out['color_fine'].detach().cpu().numpy()) - - if feasible('color_mlp'): - out_rgb_mlp.append(render_out['color_mlp'].detach().cpu().numpy()) - - if feasible('gradients') and feasible('weights'): - if render_out['inside_sphere'] is not None: - out_normal_fine.append((render_out['gradients'] * render_out['weights'][:, - :self.n_samples + self.n_importance, - None] * render_out['inside_sphere'][ - ..., None]).sum(dim=1).detach().cpu().numpy()) - else: - out_normal_fine.append((render_out['gradients'] * render_out['weights'][:, - :self.n_samples + self.n_importance, - None]).sum(dim=1).detach().cpu().numpy()) - del render_out - - # - save visualization of lod 0 - - self.save_visualization(true_img, true_depth_colored, out_depth_fine, out_normal_fine, - query_w2c[0], out_rgb_fine, H, W, - depth_min, depth_max, iter_step, meta, "val_lod0", - out_color_mlp=out_rgb_mlp, true_depth=true_depth) - - # - extract mesh - if (iter_step % self.val_mesh_freq == 0): - torch.cuda.empty_cache() - self.validate_mesh(self.sdf_network_finetune, - self.sdf_renderer_finetune.extract_geometry, - conditional_volume=con_volume_lod0, lod=0, - threshold=0, - occupancy_mask=con_valid_mask_volume_lod0[0, 0], - mode='val', meta=meta, - iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat) - - torch.cuda.empty_cache() - - def export_mesh_step(self, sample, - perturb_overwrite=-1, - background_rgb=None, - iter_step=0, - chunk_size=512, - save_vis=True, - ): - # * only support batch_size==1 - # ! attention: the list of string cannot be splited in DataParallel - batch_idx = sample['batch_idx'][0] - # meta = sample['meta'][batch_idx] # the scan lighting ref_view info - meta='' - - sizeW = sample['img_wh'][0][0] - sizeH = sample['img_wh'][0][1] - - near, far = sample['query_near_far'][0, :1], sample['query_near_far'][0, 1:] - - - # the ray variables - sample_rays = sample['rays'] - rays_o = sample_rays['rays_o'][0] - rays_d = sample_rays['rays_v'][0] - - intrinsics = sample['intrinsics'][0] - intrinsics_l_4x = intrinsics.clone() - intrinsics_l_4x[:, :2] *= 0.25 - - - # - the image to render - scale_mat = sample['scale_mat'] # [1,4,4] used to convert mesh into true scale - trans_mat = sample['trans_mat'] - - true_img = sample['query_image'][0] - true_img = np.uint8(true_img.permute(1, 2, 0).cpu().numpy() * 255) - - - rays_o = rays_o.reshape(-1, 3).split(chunk_size) - rays_d = rays_d.reshape(-1, 3).split(chunk_size) - - # import ipdb; ipdb.set_trace() - # - obtain conditional features - with torch.no_grad(): - # - lod 0 - conditional_features_lod0 = self.sdf_network_finetune.get_conditional_volume() - - con_volume_lod0 = conditional_features_lod0['dense_volume_scale0'] - con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0'] - # coords_lod0 = conditional_features_lod0['coords_scale0'] # [1,3,wX,wY,wZ] - - - # - extract mesh - - torch.cuda.empty_cache() - self.validate_mesh(self.sdf_network_finetune, - self.sdf_renderer_finetune.extract_geometry, - conditional_volume=con_volume_lod0, lod=0, - threshold=0, - occupancy_mask=con_valid_mask_volume_lod0[0, 0], - mode='val', meta=meta, - iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat) - - torch.cuda.empty_cache() - - def save_visualization(self, true_img, true_colored_depth, out_depth, out_normal, w2cs, out_color, H, W, - depth_min, depth_max, iter_step, meta, comment, out_color_mlp=[], true_depth=None): - if len(out_color) > 0: - img_fine = (np.concatenate(out_color, axis=0).reshape([H, W, 3]) * 256).clip(0, 255) - - if len(out_color_mlp) > 0: - img_mlp = (np.concatenate(out_color_mlp, axis=0).reshape([H, W, 3]) * 256).clip(0, 255) - - if len(out_normal) > 0: - normal_img = np.concatenate(out_normal, axis=0) - rot = w2cs[:3, :3].detach().cpu().numpy() - # - convert normal from world space to camera space - normal_img = (np.matmul(rot[None, :, :], - normal_img[:, :, None]).reshape([H, W, 3]) * 128 + 128).clip(0, 255) - if len(out_depth) > 0: - pred_depth = np.concatenate(out_depth, axis=0).reshape([H, W]) - pred_depth_colored = visualize_depth_numpy(pred_depth, [depth_min, depth_max])[0] - - if len(out_depth) > 0: - os.makedirs(os.path.join(self.base_exp_dir, 'depths_' + comment), exist_ok=True) - if true_colored_depth is not None: - - if true_depth is not None: - depth_error_map = np.abs(true_depth - pred_depth) * 5.0 - depth_visualized = np.concatenate( - [depth_error_map, true_colored_depth, pred_depth_colored, true_img], axis=1)[:, :, ::-1] - else: - depth_visualized = np.concatenate( - [true_colored_depth, pred_depth_colored, true_img])[:, :, ::-1] - cv.imwrite( - os.path.join(self.base_exp_dir, 'depths_' + comment, - '{:0>8d}_{}.png'.format(iter_step, meta)), depth_visualized - ) - else: - cv.imwrite( - os.path.join(self.base_exp_dir, 'depths_' + comment, - '{:0>8d}_{}.png'.format(iter_step, meta)), - np.concatenate( - [pred_depth_colored, true_img])[:, :, ::-1]) - if len(out_color) > 0: - os.makedirs(os.path.join(self.base_exp_dir, 'synthesized_color_' + comment), exist_ok=True) - cv.imwrite(os.path.join(self.base_exp_dir, 'synthesized_color_' + comment, - '{:0>8d}_{}.png'.format(iter_step, meta)), - np.concatenate( - [img_fine, true_img])[:, :, ::-1]) # bgr2rgb - - if len(out_color_mlp) > 0: - os.makedirs(os.path.join(self.base_exp_dir, 'synthesized_color_mlp_' + comment), exist_ok=True) - cv.imwrite(os.path.join(self.base_exp_dir, 'synthesized_color_mlp_' + comment, - '{:0>8d}_{}.png'.format(iter_step, meta)), - np.concatenate( - [img_mlp, true_img])[:, :, ::-1]) # bgr2rgb - - if len(out_normal) > 0: - os.makedirs(os.path.join(self.base_exp_dir, 'normals_' + comment), exist_ok=True) - cv.imwrite(os.path.join(self.base_exp_dir, 'normals_' + comment, - '{:0>8d}_{}.png'.format(iter_step, meta)), - normal_img[:, :, ::-1]) - - def forward(self, sample, - perturb_overwrite=-1, - background_rgb=None, - iter_step=0, - mode='train', - save_vis=False, - ): - - if mode == 'train': - return self.train_step(sample, - perturb_overwrite=perturb_overwrite, - background_rgb=background_rgb, - iter_step=iter_step, - ) - elif mode == 'val': - return self.val_step(sample, - perturb_overwrite=perturb_overwrite, - background_rgb=background_rgb, - iter_step=iter_step, save_vis=save_vis, - ) - elif mode == 'export_mesh': - return self.export_mesh_step(sample, - perturb_overwrite=perturb_overwrite, - background_rgb=background_rgb, - iter_step=iter_step, save_vis=save_vis, - ) - - def obtain_pyramid_feature_maps(self, imgs, lod=0): - """ - get feature maps of all conditional images - :param imgs: - :return: - """ - - if lod == 0: - extractor = self.pyramid_feature_network_geometry_lod0 - elif lod >= 1: - extractor = self.pyramid_feature_network_geometry_lod1 - - pyramid_feature_maps = extractor(imgs) - - # * the pyramid features are very important, if only use the coarst features, hard to optimize - fused_feature_maps = torch.cat([ - F.interpolate(pyramid_feature_maps[0], scale_factor=4, mode='bilinear', align_corners=True), - F.interpolate(pyramid_feature_maps[1], scale_factor=2, mode='bilinear', align_corners=True), - pyramid_feature_maps[2] - ], dim=1) - - return fused_feature_maps - - def cal_losses_sdf(self, render_out, sample_rays, iter_step=-1): - - def get_weight(iter_step, weight): - if iter_step < 0: - return weight - - if self.anneal_end == 0.0: - return weight - elif iter_step < self.anneal_start: - return 0.0 - else: - return np.min( - [1.0, - (iter_step - self.anneal_start) / (self.anneal_end * 2 - self.anneal_start)]) * weight - - rays_o = sample_rays['rays_o'][0] - rays_d = sample_rays['rays_v'][0] - true_rgb = sample_rays['rays_color'][0] - - if 'rays_depth' in sample_rays.keys(): - true_depth = sample_rays['rays_depth'][0] - else: - true_depth = None - mask = sample_rays['rays_mask'][0] - - color_fine = render_out['color_fine'] - color_fine_mask = render_out['color_fine_mask'] - depth_pred = render_out['depth'] - - variance = render_out['variance'] - cdf_fine = render_out['cdf_fine'] - weight_sum = render_out['weights_sum'] - - if self.train_from_scratch: - occlusion_aware = False if iter_step < 5000 else True - else: - occlusion_aware = True - - gradient_error_fine = render_out['gradient_error_fine'] - - sdf = render_out['sdf'] - - # * color generated by mlp - color_mlp = render_out['color_mlp'] - color_mlp_mask = render_out['color_mlp_mask'] - - if color_mlp is not None: - # Color loss - color_mlp_mask = color_mlp_mask[..., 0] - - color_mlp_loss, color_mlp_error = self.occlusion_color_criterion(pred=color_mlp, gt=true_rgb, - weight=weight_sum.squeeze(), - mask=color_mlp_mask, - occlusion_aware=occlusion_aware) - - psnr_mlp = 20.0 * torch.log10( - 1.0 / (((color_mlp[color_mlp_mask] - true_rgb[color_mlp_mask]) ** 2).mean() / (3.0)).sqrt()) - else: - color_mlp_loss = 0. - psnr_mlp = 0. - - # - blended patch loss - blended_color_patch = render_out['blended_color_patch'] # [N_pts, Npx, 3] - blended_color_patch_mask = render_out['blended_color_patch_mask'] # [N_pts, 1] - color_patch_loss = 0.0 - color_patch_error = 0.0 - visibility_beta = 0.0 - if blended_color_patch is not None: - rays_patch_color = sample_rays['rays_patch_color'][0] - rays_patch_mask = sample_rays['rays_patch_mask'][0] - patch_mask = (rays_patch_mask * blended_color_patch_mask).float()[:, 0] > 0 # [N_pts] - - color_patch_loss, color_patch_error, visibility_beta = self.occlusion_color_patch_criterion( - blended_color_patch, - rays_patch_color, - weight=weight_sum.squeeze(), - mask=patch_mask, - penalize_ratio=self.visibility_penalize_ratio, - occlusion_aware=occlusion_aware - ) - - if true_depth is not None: - depth_loss = self.depth_criterion(depth_pred, true_depth, mask) - - # depth evaluation - depth_statis = compute_depth_errors(depth_pred.detach().cpu().numpy(), true_depth.cpu().numpy(), - mask.cpu().numpy() > 0) - depth_statis = numpy2tensor(depth_statis, device=rays_o.device) - else: - depth_loss = 0. - depth_statis = None - - # - if without sparse_loss, the mean sdf is 0.02. - # - use sparse_loss to prevent occluded pts have 0 sdf - sparse_loss_1 = torch.exp(-1 * torch.abs(render_out['sdf_random']) * self.sdf_decay_param * 10).mean() - sparse_loss_2 = torch.exp(-1 * torch.abs(sdf) * self.sdf_decay_param).mean() - sparse_loss = (sparse_loss_1 + sparse_loss_2) / 2 - - sdf_mean = torch.abs(sdf).mean() - sparseness_1 = (torch.abs(sdf) < 0.01).to(torch.float32).mean() - sparseness_2 = (torch.abs(sdf) < 0.02).to(torch.float32).mean() - - # Eikonal loss - gradient_error_loss = gradient_error_fine - - # * optional TV regularizer - if 'tv' in render_out.keys(): - tv = render_out['tv'] - else: - tv = 0.0 - - loss = color_mlp_loss + \ - color_patch_loss * self.color_patch_weight + \ - sparse_loss * get_weight(iter_step, self.sdf_sparse_weight) + \ - gradient_error_loss * self.sdf_igr_weight - - losses = { - "loss": loss, - "depth_loss": depth_loss, - "color_mlp_loss": color_mlp_error, - "gradient_error_loss": gradient_error_loss, - "sparse_loss": sparse_loss, - "sparseness_1": sparseness_1, - "sparseness_2": sparseness_2, - "sdf_mean": sdf_mean, - "psnr_mlp": psnr_mlp, - "weights_sum": render_out['weights_sum'], - "alpha_sum": render_out['alpha_sum'], - "variance": render_out['variance'], - "sparse_weight": get_weight(iter_step, self.sdf_sparse_weight), - 'color_patch_loss': color_patch_error, - 'visibility_beta': visibility_beta, - 'tv': tv, - } - - losses = numpy2tensor(losses, device=rays_o.device) - - return loss, losses, depth_statis - - def validate_mesh(self, sdf_network, func_extract_geometry, world_space=True, resolution=256, - threshold=0.0, mode='val', - # * 3d feature volume - conditional_volume=None, lod=None, occupancy_mask=None, - bound_min=[-1, -1, -1], bound_max=[1, 1, 1], meta='', iter_step=0, scale_mat=None, - trans_mat=None - ): - bound_min = torch.tensor(bound_min, dtype=torch.float32) - bound_max = torch.tensor(bound_max, dtype=torch.float32) - - vertices, triangles, fields = func_extract_geometry( - sdf_network, - bound_min, bound_max, resolution=resolution, - threshold=threshold, device=conditional_volume.device, - # * 3d feature volume - conditional_volume=conditional_volume, lod=lod, - # occupancy_mask=occupancy_mask - ) - - - - if scale_mat is not None: - scale_mat_np = scale_mat.cpu().numpy() - vertices = vertices * scale_mat_np[0][0, 0] + scale_mat_np[0][:3, 3][None] - - if trans_mat is not None: - trans_mat_np = trans_mat.cpu().numpy() - vertices_homo = np.concatenate([vertices, np.ones_like(vertices[:, :1])], axis=1) - vertices = np.matmul(trans_mat_np, vertices_homo[:, :, None])[:, :3, 0] - - mesh = trimesh.Trimesh(vertices, triangles) - os.makedirs(os.path.join(self.base_exp_dir, 'meshes_' + mode), exist_ok=True) - mesh.export(os.path.join(self.base_exp_dir, 'meshes_' + mode, - 'mesh_{:0>8d}_{}_lod{:0>1d}.ply'.format(iter_step, meta, lod))) - - def gen_video(self, sample, - perturb_overwrite=-1, - background_rgb=None, - iter_step=0, - chunk_size=1024, - ): - # * only support batch_size==1 - batch_idx = sample['batch_idx'][0] - meta = sample['meta'][batch_idx] # the scan lighting ref_view info - - sizeW = sample['img_wh'][0][0] - sizeH = sample['img_wh'][0][1] - H, W = sizeH, sizeW - - partial_vol_origin = sample['partial_vol_origin'] # [B, 3] - near, far = sample['query_near_far'][0, :1], sample['query_near_far'][0, 1:] * 0.8 - - img_index = sample['img_index'][0] # [n] - - # the ray variables - sample_rays = sample['rays'] - rays_o = sample_rays['rays_o'][0] - rays_d = sample_rays['rays_v'][0] - rays_ndc_uv = sample_rays['rays_ndc_uv'][0] - - imgs = sample['images'][0] - intrinsics = sample['intrinsics'][0] - intrinsics_l_4x = intrinsics.clone() - intrinsics_l_4x[:, :2] *= 0.25 - w2cs = sample['w2cs'][0] - c2ws = sample['c2ws'][0] - proj_matrices = sample['affine_mats'] - - # - the image to render - scale_mat = sample['scale_mat'] # [1,4,4] used to convert mesh into true scale - trans_mat = sample['trans_mat'] - query_c2w = sample['query_c2w'] # [1,4,4] - query_w2c = sample['query_w2c'] # [1,4,4] - true_img = sample['query_image'][0] - true_img = np.uint8(true_img.permute(1, 2, 0).cpu().numpy() * 255) - rendering_c2ws = sample['rendering_c2ws'][0] # [n, 4, 4] - rendering_imgs_idx = sample['rendering_imgs_idx'][0] - - depth_min, depth_max = near.cpu().numpy(), far.cpu().numpy() - - true_depth = sample['query_depth'] if 'query_depth' in sample.keys() else None - if true_depth is not None: - true_depth = true_depth[0].cpu().numpy() - true_depth_colored = visualize_depth_numpy(true_depth, [depth_min, depth_max])[0] - else: - true_depth_colored = None - - # - obtain conditional features - with torch.no_grad(): - # - lod 0 - conditional_features_lod0 = self.sdf_network_finetune.get_conditional_volume() - - con_volume_lod0 = conditional_features_lod0['dense_volume_scale0'] - con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0'] - # coords_lod0 = conditional_features_lod0['coords_scale0'] # [1,3,wX,wY,wZ] - - inter_views_num = 60 - resolution_level = 2 - for r_idx in range(rendering_c2ws.shape[0] - 1): - for idx in range(inter_views_num): - query_c2w, rays_o, rays_d = gen_rays_between( - rendering_c2ws[r_idx], rendering_c2ws[r_idx + 1], intrinsics[0], - np.sin(((idx / 60.0) - 0.5) * np.pi) * 0.5 + 0.5, - H, W, resolution_level=resolution_level) - - rays_o = rays_o.reshape(-1, 3).split(chunk_size) - rays_d = rays_d.reshape(-1, 3).split(chunk_size) - - out_rgb_fine = [] - out_normal_fine = [] - out_depth_fine = [] - - for rays_o_batch, rays_d_batch in zip(rays_o, rays_d): - # ****** lod 0 **** - render_out = self.sdf_renderer_finetune.render( - rays_o_batch, rays_d_batch, near, far, - self.sdf_network_finetune, - None, - background_rgb=background_rgb, - alpha_inter_ratio=1., - # * related to conditional feature - lod=0, - conditional_volume=con_volume_lod0, - conditional_valid_mask_volume=con_valid_mask_volume_lod0, - # * 2d feature maps - feature_maps=None, - color_maps=imgs, - w2cs=w2cs, - intrinsics=intrinsics, - img_wh=[sizeW, sizeH], - query_c2w=query_c2w, - if_general_rendering=False, - if_render_with_grad=False, - img_index=img_index, - # rays_uv=rays_ndc_uv - ) - # pdb.set_trace() - feasible = lambda key: ((key in render_out) and (render_out[key] is not None)) - - if feasible('depth'): - out_depth_fine.append(render_out['depth'].detach().cpu().numpy()) - - # if render_out['color_coarse'] is not None: - if feasible('color_mlp'): - out_rgb_fine.append(render_out['color_mlp'].detach().cpu().numpy()) - if feasible('gradients') and feasible('weights'): - if render_out['inside_sphere'] is not None: - out_normal_fine.append((render_out['gradients'] * render_out['weights'][:, - :self.n_samples + self.n_importance, - None] * render_out['inside_sphere'][ - ..., None]).sum(dim=1).detach().cpu().numpy()) - else: - out_normal_fine.append((render_out['gradients'] * render_out['weights'][:, - :self.n_samples + self.n_importance, - None]).sum(dim=1).detach().cpu().numpy()) - del render_out - - img_fine = (np.concatenate(out_rgb_fine, axis=0).reshape( - [H // resolution_level, W // resolution_level, 3, -1]) * 256).clip(0, 255) - save_dir = os.path.join(self.base_exp_dir, 'render_{}_{}'.format(rendering_imgs_idx[r_idx], - rendering_imgs_idx[r_idx + 1])) - os.makedirs(save_dir, exist_ok=True) - # ic(img_fine.shape) - print(cv.imwrite( - os.path.join(save_dir, '{}.png'.format(idx + r_idx * inter_views_num)), - img_fine.squeeze()[:, :, ::-1])) - print(os.path.join(save_dir, '{}.png'.format(idx + r_idx * inter_views_num))) diff --git a/SparseNeuS_demo_v1/models/trainer_generic.py b/SparseNeuS_demo_v1/models/trainer_generic.py index 5c87d61d5c7feb93dadd40099a5ebe0a9db81924..786ccfd0f84f45ec395db8831b78cecbda803139 100644 --- a/SparseNeuS_demo_v1/models/trainer_generic.py +++ b/SparseNeuS_demo_v1/models/trainer_generic.py @@ -8,26 +8,17 @@ import torch.nn as nn import torch.nn.functional as F import numpy as np -import logging -import mcubes import trimesh from icecream import ic from utils.misc_utils import visualize_depth_numpy -from utils.training_utils import numpy2tensor from loss.depth_metric import compute_depth_errors from loss.depth_loss import DepthLoss, DepthSmoothLoss -from models.rays import gen_rays_between - from models.sparse_neus_renderer import SparseNeuSRenderer -def safe_l2_normalize(x, dim=None, eps=1e-6): - return F.normalize(x, p=2, dim=dim, eps=eps) - - class GenericTrainer(nn.Module): def __init__(self, rendering_network_outside, @@ -223,7 +214,6 @@ class GenericTrainer(nn.Module): con_volume_lod0 = conditional_features_lod0['dense_volume_scale0'] con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0'] - # import ipdb; ipdb.set_trace() coords_lod0 = conditional_features_lod0['coords_scale0'] # [1,3,wX,wY,wZ] # * extract depth maps for all the images @@ -347,8 +337,6 @@ class GenericTrainer(nn.Module): mode='train_bg', meta=meta, iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat) - # import ipdb; ipdb.set_trace() - # print("Checker3.1:, after val mesh") losses = { # - lod 0 'loss_lod0': loss_lod0, @@ -427,7 +415,6 @@ class GenericTrainer(nn.Module): with torch.no_grad(): # - obtain conditional features geometry_feature_maps = self.obtain_pyramid_feature_maps(imgs, lod=0) - # import ipdb; ipdb.set_trace() # - lod 0 conditional_features_lod0 = self.sdf_network_lod0.get_conditional_volume( feature_maps=geometry_feature_maps[None, :, :, :, :], @@ -836,7 +823,6 @@ class GenericTrainer(nn.Module): print("meta: ", meta) print("scale_factor: ", scale_factor) print("depth_error_mean: ", depth_error_map.mean()) - # import ipdb; ipdb.set_trace() depth_visualized = np.concatenate( [(depth_error_map * 255).astype(np.uint8), true_colored_depth, pred_depth_colored, true_img], axis=1)[:, :, ::-1] # print("depth_visualized.shape: ", depth_visualized.shape) @@ -1003,12 +989,10 @@ class GenericTrainer(nn.Module): if color_fine is not None: # Color loss color_mask = color_fine_mask if color_fine_mask is not None else mask - # import ipdb; ipdb.set_trace() color_mask = color_mask[..., 0] color_error = (color_fine[color_mask] - true_rgb[color_mask]) # print("Nan number", torch.isnan(color_error).sum()) # print("Color error shape", color_error.shape) - # import ipdb; ipdb.set_trace() color_fine_loss = F.l1_loss(color_error, torch.zeros_like(color_error).to(color_error.device), reduction='mean') # print(color_fine_loss) @@ -1100,8 +1084,7 @@ class GenericTrainer(nn.Module): "fg_bg_weight": fg_bg_weight, "fg_bg_loss": fg_bg_loss, # added by jha, bug of sparseNeuS } - # print("[TEST]: weights_sum in trainner forward", losses['weights_sum'].mean()) - losses = numpy2tensor(losses, device=rays_o.device) + losses = torch.tensor(losses, device=rays_o.device) return loss, losses, depth_statis @torch.no_grad() @@ -1213,7 +1196,7 @@ class GenericTrainer(nn.Module): trans_mat_np = trans_mat.cpu().numpy() vertices_homo = np.concatenate([vertices, np.ones_like(vertices[:, :1])], axis=1) vertices = np.matmul(trans_mat_np, vertices_homo[:, :, None])[:, :3, 0] - # import ipdb; ipdb.set_trace() + vertices_color = np.array(vertices_color.squeeze(0).cpu() * 255, dtype=np.uint8) mesh = trimesh.Trimesh(vertices, triangles, vertex_colors=vertices_color) os.makedirs(os.path.join(self.base_exp_dir, 'meshes_' + mode, 'lod{:0>1d}'.format(lod)), exist_ok=True) diff --git a/SparseNeuS_demo_v1/models/trainer_generic_normals_new.py b/SparseNeuS_demo_v1/models/trainer_generic_normals_new.py deleted file mode 100644 index 8a75f2c7fcaf613e1a4c5deeb9a8be15abd96d8d..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/models/trainer_generic_normals_new.py +++ /dev/null @@ -1,1313 +0,0 @@ -""" -decouple the trainer with the renderer -""" -import os -import cv2 as cv -import torch -import torch.nn as nn -import torch.nn.functional as F - -import numpy as np -import logging -import mcubes -import trimesh -from icecream import ic - -from utils.misc_utils import visualize_depth_numpy - -from utils.training_utils import numpy2tensor -from loss.depth_metric import compute_depth_errors - -from loss.depth_loss import DepthLoss, DepthSmoothLoss - -from models.rays import gen_rays_between - -from models.sparse_neus_renderer_normals_new import SparseNeuSRenderer - -def safe_l2_normalize(x, dim=None, eps=1e-6): - return F.normalize(x, p=2, dim=dim, eps=eps) - - -class GenericTrainer(nn.Module): - def __init__(self, - rendering_network_outside, - pyramid_feature_network_lod0, - pyramid_feature_network_lod1, - sdf_network_lod0, - sdf_network_lod1, - variance_network_lod0, - variance_network_lod1, - rendering_network_lod0, - rendering_network_lod1, - n_samples_lod0, - n_importance_lod0, - n_samples_lod1, - n_importance_lod1, - n_outside, - perturb, - alpha_type='div', - conf=None, - timestamp="", - mode='train', - base_exp_dir=None, - ): - super(GenericTrainer, self).__init__() - - self.conf = conf - self.timestamp = timestamp - - - self.base_exp_dir = base_exp_dir - - - self.anneal_start = self.conf.get_float('train.anneal_start', default=0.0) - self.anneal_end = self.conf.get_float('train.anneal_end', default=0.0) - self.anneal_start_lod1 = self.conf.get_float('train.anneal_start_lod1', default=0.0) - self.anneal_end_lod1 = self.conf.get_float('train.anneal_end_lod1', default=0.0) - - # network setups - self.rendering_network_outside = rendering_network_outside - self.pyramid_feature_network_geometry_lod0 = pyramid_feature_network_lod0 # 2D pyramid feature network for geometry - self.pyramid_feature_network_geometry_lod1 = pyramid_feature_network_lod1 # use differnet networks for the two lods - - # when num_lods==2, may consume too much memeory - self.sdf_network_lod0 = sdf_network_lod0 - self.sdf_network_lod1 = sdf_network_lod1 - - # - warpped by ModuleList to support DataParallel - self.variance_network_lod0 = variance_network_lod0 - self.variance_network_lod1 = variance_network_lod1 - - self.rendering_network_lod0 = rendering_network_lod0 - self.rendering_network_lod1 = rendering_network_lod1 - - self.n_samples_lod0 = n_samples_lod0 - self.n_importance_lod0 = n_importance_lod0 - self.n_samples_lod1 = n_samples_lod1 - self.n_importance_lod1 = n_importance_lod1 - self.n_outside = n_outside - self.num_lods = conf.get_int('model.num_lods') # the number of octree lods - self.perturb = perturb - self.alpha_type = alpha_type - - # - the two renderers - self.sdf_renderer_lod0 = SparseNeuSRenderer( - self.rendering_network_outside, - self.sdf_network_lod0, - self.variance_network_lod0, - self.rendering_network_lod0, - self.n_samples_lod0, - self.n_importance_lod0, - self.n_outside, - self.perturb, - alpha_type='div', - conf=self.conf) - - self.sdf_renderer_lod1 = SparseNeuSRenderer( - self.rendering_network_outside, - self.sdf_network_lod1, - self.variance_network_lod1, - self.rendering_network_lod1, - self.n_samples_lod1, - self.n_importance_lod1, - self.n_outside, - self.perturb, - alpha_type='div', - conf=self.conf) - - self.if_fix_lod0_networks = self.conf.get_bool('train.if_fix_lod0_networks') - - # sdf network weights - self.sdf_igr_weight = self.conf.get_float('train.sdf_igr_weight') - self.sdf_sparse_weight = self.conf.get_float('train.sdf_sparse_weight', default=0) - self.sdf_decay_param = self.conf.get_float('train.sdf_decay_param', default=100) - self.fg_bg_weight = self.conf.get_float('train.fg_bg_weight', default=0.00) - self.bg_ratio = self.conf.get_float('train.bg_ratio', default=0.0) - - self.depth_criterion = DepthLoss() - - # - DataParallel mode, cannot modify attributes in forward() - # self.iter_step = 0 - self.val_mesh_freq = self.conf.get_int('train.val_mesh_freq') - - # - True for finetuning; False for general training - self.if_fitted_rendering = self.conf.get_bool('train.if_fitted_rendering', default=False) - - self.prune_depth_filter = self.conf.get_bool('model.prune_depth_filter', default=False) - - def get_trainable_params(self): - # set trainable params - - self.params_to_train = [] - - if not self.if_fix_lod0_networks: - # load pretrained featurenet - self.params_to_train += list(self.pyramid_feature_network_geometry_lod0.parameters()) - self.params_to_train += list(self.sdf_network_lod0.parameters()) - self.params_to_train += list(self.variance_network_lod0.parameters()) - - if self.rendering_network_lod0 is not None: - self.params_to_train += list(self.rendering_network_lod0.parameters()) - - if self.sdf_network_lod1 is not None: - # load pretrained featurenet - self.params_to_train += list(self.pyramid_feature_network_geometry_lod1.parameters()) - - self.params_to_train += list(self.sdf_network_lod1.parameters()) - self.params_to_train += list(self.variance_network_lod1.parameters()) - if self.rendering_network_lod1 is not None: - self.params_to_train += list(self.rendering_network_lod1.parameters()) - - return self.params_to_train - - def train_step(self, sample, - perturb_overwrite=-1, - background_rgb=None, - alpha_inter_ratio_lod0=0.0, - alpha_inter_ratio_lod1=0.0, - iter_step=0, - ): - # * only support batch_size==1 - # ! attention: the list of string cannot be splited in DataParallel - batch_idx = sample['batch_idx'][0] - meta = sample['meta'][batch_idx] # the scan lighting ref_view info - - sizeW = sample['img_wh'][0][0] - sizeH = sample['img_wh'][0][1] - partial_vol_origin = sample['partial_vol_origin'] # [B, 3] - near, far = sample['near_fars'][0, 0, :1], sample['near_fars'][0, 0, 1:] - - # the full-size ray variables - sample_rays = sample['rays'] - rays_o = sample_rays['rays_o'][0] - rays_d = sample_rays['rays_v'][0] - - imgs = sample['images'][0] - intrinsics = sample['intrinsics'][0] - intrinsics_l_4x = intrinsics.clone() - intrinsics_l_4x[:, :2] *= 0.25 - w2cs = sample['w2cs'][0] - c2ws = sample['c2ws'][0] - proj_matrices = sample['affine_mats'] - scale_mat = sample['scale_mat'] - trans_mat = sample['trans_mat'] - - # *********************** Lod==0 *********************** - if not self.if_fix_lod0_networks: - geometry_feature_maps = self.obtain_pyramid_feature_maps(imgs) - - conditional_features_lod0 = self.sdf_network_lod0.get_conditional_volume( - feature_maps=geometry_feature_maps[None, 1:, :, :, :], - partial_vol_origin=partial_vol_origin, - proj_mats=proj_matrices[:,1:], - # proj_mats=proj_matrices, - sizeH=sizeH, - sizeW=sizeW, - lod=0, - ) - - else: - with torch.no_grad(): - geometry_feature_maps = self.obtain_pyramid_feature_maps(imgs, lod=0) - # geometry_feature_maps = self.obtain_pyramid_feature_maps(imgs, lod=0) - conditional_features_lod0 = self.sdf_network_lod0.get_conditional_volume( - feature_maps=geometry_feature_maps[None, 1:, :, :, :], - partial_vol_origin=partial_vol_origin, - proj_mats=proj_matrices[:,1:], - # proj_mats=proj_matrices, - sizeH=sizeH, - sizeW=sizeW, - lod=0, - ) - # print("Checker2:, construct cost volume") - con_volume_lod0 = conditional_features_lod0['dense_volume_scale0'] - - con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0'] - # import ipdb; ipdb.set_trace() - coords_lod0 = conditional_features_lod0['coords_scale0'] # [1,3,wX,wY,wZ] - - # * extract depth maps for all the images - depth_maps_lod0, depth_masks_lod0 = None, None - if self.num_lods > 1: - sdf_volume_lod0 = self.sdf_network_lod0.get_sdf_volume( - con_volume_lod0, con_valid_mask_volume_lod0, - coords_lod0, partial_vol_origin) # [1, 1, dX, dY, dZ] - - if self.prune_depth_filter: - depth_maps_lod0_l4x, depth_masks_lod0_l4x = self.sdf_renderer_lod0.extract_depth_maps( - self.sdf_network_lod0, sdf_volume_lod0, intrinsics_l_4x, c2ws, - sizeH // 4, sizeW // 4, near * 1.5, far) - depth_maps_lod0 = F.interpolate(depth_maps_lod0_l4x, size=(sizeH, sizeW), mode='bilinear', - align_corners=True) - depth_masks_lod0 = F.interpolate(depth_masks_lod0_l4x.float(), size=(sizeH, sizeW), mode='nearest') - - # *************** losses - loss_lod0, losses_lod0, depth_statis_lod0 = None, None, None - - if not self.if_fix_lod0_networks: - - render_out = self.sdf_renderer_lod0.render( - rays_o, rays_d, near, far, - self.sdf_network_lod0, - self.rendering_network_lod0, - background_rgb=background_rgb, - alpha_inter_ratio=alpha_inter_ratio_lod0, - # * related to conditional feature - lod=0, - conditional_volume=con_volume_lod0, - conditional_valid_mask_volume=con_valid_mask_volume_lod0, - # * 2d feature maps - feature_maps=geometry_feature_maps, - color_maps=imgs, - w2cs=w2cs, - intrinsics=intrinsics, - img_wh=[sizeW, sizeH], - if_general_rendering=True, - if_render_with_grad=True, - ) - - loss_lod0, losses_lod0, depth_statis_lod0 = self.cal_losses_sdf(render_out, sample_rays, - iter_step, lod=0) - - # *********************** Lod==1 *********************** - - loss_lod1, losses_lod1, depth_statis_lod1 = None, None, None - - if self.num_lods > 1: - geometry_feature_maps_lod1 = self.obtain_pyramid_feature_maps(imgs, lod=1) - # geometry_feature_maps_lod1 = self.obtain_pyramid_feature_maps(imgs, lod=1) - if self.prune_depth_filter: - pre_coords, pre_feats = self.sdf_renderer_lod0.get_valid_sparse_coords_by_sdf_depthfilter( - sdf_volume_lod0[0], coords_lod0[0], con_valid_mask_volume_lod0[0], con_volume_lod0[0], - depth_maps_lod0, proj_matrices[0], - partial_vol_origin, self.sdf_network_lod0.voxel_size, - near, far, self.sdf_network_lod0.voxel_size, 12) - else: - pre_coords, pre_feats = self.sdf_renderer_lod0.get_valid_sparse_coords_by_sdf( - sdf_volume_lod0[0], coords_lod0[0], con_valid_mask_volume_lod0[0], con_volume_lod0[0]) - - pre_coords[:, 1:] = pre_coords[:, 1:] * 2 - - # ? It seems that training gru_fusion, this part should be trainable too - conditional_features_lod1 = self.sdf_network_lod1.get_conditional_volume( - feature_maps=geometry_feature_maps_lod1[None, 1:, :, :, :], - partial_vol_origin=partial_vol_origin, - proj_mats=proj_matrices[:,1:], - # proj_mats=proj_matrices, - sizeH=sizeH, - sizeW=sizeW, - pre_coords=pre_coords, - pre_feats=pre_feats, - ) - - con_volume_lod1 = conditional_features_lod1['dense_volume_scale1'] - con_valid_mask_volume_lod1 = conditional_features_lod1['valid_mask_volume_scale1'] - - # if not self.if_gru_fusion_lod1: - render_out_lod1 = self.sdf_renderer_lod1.render( - rays_o, rays_d, near, far, - self.sdf_network_lod1, - self.rendering_network_lod1, - background_rgb=background_rgb, - alpha_inter_ratio=alpha_inter_ratio_lod1, - # * related to conditional feature - lod=1, - conditional_volume=con_volume_lod1, - conditional_valid_mask_volume=con_valid_mask_volume_lod1, - # * 2d feature maps - feature_maps=geometry_feature_maps_lod1, - color_maps=imgs, - w2cs=w2cs, - intrinsics=intrinsics, - img_wh=[sizeW, sizeH], - bg_ratio=self.bg_ratio, - ) - loss_lod1, losses_lod1, depth_statis_lod1 = self.cal_losses_sdf(render_out_lod1, sample_rays, - iter_step, lod=1) - - # print("Checker3:, compute losses") - # # - extract mesh - if iter_step % self.val_mesh_freq == 0: - torch.cuda.empty_cache() - self.validate_mesh(self.sdf_network_lod0, - self.sdf_renderer_lod0.extract_geometry, - conditional_volume=con_volume_lod0, lod=0, - threshold=0, - # occupancy_mask=con_valid_mask_volume_lod0[0, 0], - mode='train_bg', meta=meta, - iter_step=iter_step, scale_mat=scale_mat, - trans_mat=trans_mat) - torch.cuda.empty_cache() - - if self.num_lods > 1: - self.validate_mesh(self.sdf_network_lod1, - self.sdf_renderer_lod1.extract_geometry, - conditional_volume=con_volume_lod1, lod=1, - # occupancy_mask=con_valid_mask_volume_lod1[0, 0].detach(), - mode='train_bg', meta=meta, - iter_step=iter_step, scale_mat=scale_mat, - trans_mat=trans_mat) - # import ipdb; ipdb.set_trace() - # print("Checker3.1:, after val mesh") - losses = { - # - lod 0 - 'loss_lod0': loss_lod0, - 'losses_lod0': losses_lod0, - 'depth_statis_lod0': depth_statis_lod0, - - # - lod 1 - 'loss_lod1': loss_lod1, - 'losses_lod1': losses_lod1, - 'depth_statis_lod1': depth_statis_lod1, - - } - - return losses - - def val_step(self, sample, - perturb_overwrite=-1, - background_rgb=None, - alpha_inter_ratio_lod0=0.0, - alpha_inter_ratio_lod1=0.0, - iter_step=0, - chunk_size=512, - save_vis=False, - ): - # * only support batch_size==1 - # ! attention: the list of string cannot be splited in DataParallel - batch_idx = sample['batch_idx'][0] - meta = sample['meta'][batch_idx] # the scan lighting ref_view info - - sizeW = sample['img_wh'][0][0] - sizeH = sample['img_wh'][0][1] - H, W = sizeH, sizeW - - partial_vol_origin = sample['partial_vol_origin'] # [B, 3] - near, far = sample['query_near_far'][0, :1], sample['query_near_far'][0, 1:] - - # the ray variables - sample_rays = sample['rays'] - rays_o = sample_rays['rays_o'][0] - rays_d = sample_rays['rays_v'][0] - rays_ndc_uv = sample_rays['rays_ndc_uv'][0] - - imgs = sample['images'][0] - intrinsics = sample['intrinsics'][0] - intrinsics_l_4x = intrinsics.clone() - intrinsics_l_4x[:, :2] *= 0.25 - w2cs = sample['w2cs'][0] - c2ws = sample['c2ws'][0] - proj_matrices = sample['affine_mats'] - - # render_img_idx = sample['render_img_idx'][0] - # true_img = sample['images'][0][render_img_idx] - - # - the image to render - scale_mat = sample['scale_mat'] # [1,4,4] used to convert mesh into true scale - trans_mat = sample['trans_mat'] - query_c2w = sample['query_c2w'] # [1,4,4] - query_w2c = sample['query_w2c'] # [1,4,4] - true_img = sample['query_image'][0] - true_img = np.uint8(true_img.permute(1, 2, 0).cpu().numpy() * 255) - - depth_min, depth_max = near.cpu().numpy(), far.cpu().numpy() - - scale_factor = sample['scale_factor'][0].cpu().numpy() - true_depth = sample['query_depth'] if 'query_depth' in sample.keys() else None - if true_depth is not None: - true_depth = true_depth[0].cpu().numpy() - true_depth_colored = visualize_depth_numpy(true_depth, [depth_min, depth_max])[0] - else: - true_depth_colored = None - - rays_o = rays_o.reshape(-1, 3).split(chunk_size) - rays_d = rays_d.reshape(-1, 3).split(chunk_size) - - # - obtain conditional features - with torch.no_grad(): - # - obtain conditional features - geometry_feature_maps = self.obtain_pyramid_feature_maps(imgs, lod=0) - # import ipdb; ipdb.set_trace() - # - lod 0 - conditional_features_lod0 = self.sdf_network_lod0.get_conditional_volume( - feature_maps=geometry_feature_maps[None, :, :, :, :], - partial_vol_origin=partial_vol_origin, - proj_mats=proj_matrices, - sizeH=sizeH, - sizeW=sizeW, - lod=0, - ) - - con_volume_lod0 = conditional_features_lod0['dense_volume_scale0'] - con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0'] - coords_lod0 = conditional_features_lod0['coords_scale0'] # [1,3,wX,wY,wZ] - - if self.num_lods > 1: - sdf_volume_lod0 = self.sdf_network_lod0.get_sdf_volume( - con_volume_lod0, con_valid_mask_volume_lod0, - coords_lod0, partial_vol_origin) # [1, 1, dX, dY, dZ] - - depth_maps_lod0, depth_masks_lod0 = None, None - if self.prune_depth_filter: - depth_maps_lod0_l4x, depth_masks_lod0_l4x = self.sdf_renderer_lod0.extract_depth_maps( - self.sdf_network_lod0, sdf_volume_lod0, - intrinsics_l_4x, c2ws, - sizeH // 4, sizeW // 4, near * 1.5, far) # - near*1.5 is a experienced number - depth_maps_lod0 = F.interpolate(depth_maps_lod0_l4x, size=(sizeH, sizeW), mode='bilinear', - align_corners=True) - depth_masks_lod0 = F.interpolate(depth_masks_lod0_l4x.float(), size=(sizeH, sizeW), mode='nearest') - - #### visualize the depth_maps_lod0 for checking - colored_depth_maps_lod0 = [] - for i in range(depth_maps_lod0.shape[0]): - colored_depth_maps_lod0.append( - visualize_depth_numpy(depth_maps_lod0[i, 0].cpu().numpy(), [depth_min, depth_max])[0]) - - colored_depth_maps_lod0 = np.concatenate(colored_depth_maps_lod0, axis=0).astype(np.uint8) - os.makedirs(os.path.join(self.base_exp_dir, 'depth_maps_lod0'), exist_ok=True) - cv.imwrite(os.path.join(self.base_exp_dir, 'depth_maps_lod0', - '{:0>8d}_{}.png'.format(iter_step, meta)), - colored_depth_maps_lod0[:, :, ::-1]) - - if self.num_lods > 1: - geometry_feature_maps_lod1 = self.obtain_pyramid_feature_maps(imgs, lod=1) - - if self.prune_depth_filter: - pre_coords, pre_feats = self.sdf_renderer_lod0.get_valid_sparse_coords_by_sdf_depthfilter( - sdf_volume_lod0[0], coords_lod0[0], con_valid_mask_volume_lod0[0], con_volume_lod0[0], - depth_maps_lod0, proj_matrices[0], - partial_vol_origin, self.sdf_network_lod0.voxel_size, - near, far, self.sdf_network_lod0.voxel_size, 12) - else: - pre_coords, pre_feats = self.sdf_renderer_lod0.get_valid_sparse_coords_by_sdf( - sdf_volume_lod0[0], coords_lod0[0], con_valid_mask_volume_lod0[0], con_volume_lod0[0]) - - pre_coords[:, 1:] = pre_coords[:, 1:] * 2 - - with torch.no_grad(): - conditional_features_lod1 = self.sdf_network_lod1.get_conditional_volume( - feature_maps=geometry_feature_maps_lod1[None, :, :, :, :], - partial_vol_origin=partial_vol_origin, - proj_mats=proj_matrices, - sizeH=sizeH, - sizeW=sizeW, - pre_coords=pre_coords, - pre_feats=pre_feats, - ) - - con_volume_lod1 = conditional_features_lod1['dense_volume_scale1'] - con_valid_mask_volume_lod1 = conditional_features_lod1['valid_mask_volume_scale1'] - - out_rgb_fine = [] - out_normal_fine = [] - out_depth_fine = [] - - out_rgb_fine_lod1 = [] - out_normal_fine_lod1 = [] - out_depth_fine_lod1 = [] - - # out_depth_fine_explicit = [] - if save_vis: - for rays_o_batch, rays_d_batch in zip(rays_o, rays_d): - - # ****** lod 0 **** - render_out = self.sdf_renderer_lod0.render( - rays_o_batch, rays_d_batch, near, far, - self.sdf_network_lod0, - self.rendering_network_lod0, - background_rgb=background_rgb, - alpha_inter_ratio=alpha_inter_ratio_lod0, - # * related to conditional feature - lod=0, - conditional_volume=con_volume_lod0, - conditional_valid_mask_volume=con_valid_mask_volume_lod0, - # * 2d feature maps - feature_maps=geometry_feature_maps, - color_maps=imgs, - w2cs=w2cs, - intrinsics=intrinsics, - img_wh=[sizeW, sizeH], - query_c2w=query_c2w, - if_render_with_grad=False, - ) - - feasible = lambda key: ((key in render_out) and (render_out[key] is not None)) - - if feasible('depth'): - out_depth_fine.append(render_out['depth'].detach().cpu().numpy()) - - # if render_out['color_coarse'] is not None: - if feasible('color_fine'): - out_rgb_fine.append(render_out['color_fine'].detach().cpu().numpy()) - if feasible('gradients') and feasible('weights'): - if render_out['inside_sphere'] is not None: - out_normal_fine.append((render_out['gradients'] * render_out['weights'][:, - :self.n_samples_lod0 + self.n_importance_lod0, - None] * render_out['inside_sphere'][ - ..., None]).sum(dim=1).detach().cpu().numpy()) - else: - out_normal_fine.append((render_out['gradients'] * render_out['weights'][:, - :self.n_samples_lod0 + self.n_importance_lod0, - None]).sum(dim=1).detach().cpu().numpy()) - del render_out - - # ****************** lod 1 ************************** - if self.num_lods > 1: - for rays_o_batch, rays_d_batch in zip(rays_o, rays_d): - render_out_lod1 = self.sdf_renderer_lod1.render( - rays_o_batch, rays_d_batch, near, far, - self.sdf_network_lod1, - self.rendering_network_lod1, - background_rgb=background_rgb, - alpha_inter_ratio=alpha_inter_ratio_lod1, - # * related to conditional feature - lod=1, - conditional_volume=con_volume_lod1, - conditional_valid_mask_volume=con_valid_mask_volume_lod1, - # * 2d feature maps - feature_maps=geometry_feature_maps_lod1, - color_maps=imgs, - w2cs=w2cs, - intrinsics=intrinsics, - img_wh=[sizeW, sizeH], - query_c2w=query_c2w, - if_render_with_grad=False, - ) - - feasible = lambda key: ((key in render_out_lod1) and (render_out_lod1[key] is not None)) - - if feasible('depth'): - out_depth_fine_lod1.append(render_out_lod1['depth'].detach().cpu().numpy()) - - # if render_out['color_coarse'] is not None: - if feasible('color_fine'): - out_rgb_fine_lod1.append(render_out_lod1['color_fine'].detach().cpu().numpy()) - if feasible('gradients') and feasible('weights'): - if render_out_lod1['inside_sphere'] is not None: - out_normal_fine_lod1.append((render_out_lod1['gradients'] * render_out_lod1['weights'][:, - :self.n_samples_lod1 + self.n_importance_lod1, - None] * - render_out_lod1['inside_sphere'][ - ..., None]).sum(dim=1).detach().cpu().numpy()) - else: - out_normal_fine_lod1.append((render_out_lod1['gradients'] * render_out_lod1['weights'][:, - :self.n_samples_lod1 + self.n_importance_lod1, - None]).sum( - dim=1).detach().cpu().numpy()) - del render_out_lod1 - - # - save visualization of lod 0 - - self.save_visualization(true_img, true_depth_colored, out_depth_fine, out_normal_fine, - query_w2c[0], out_rgb_fine, H, W, - depth_min, depth_max, iter_step, meta, "val_lod0", true_depth=true_depth, scale_factor=scale_factor) - - if self.num_lods > 1: - self.save_visualization(true_img, true_depth_colored, out_depth_fine_lod1, out_normal_fine_lod1, - query_w2c[0], out_rgb_fine_lod1, H, W, - depth_min, depth_max, iter_step, meta, "val_lod1", true_depth=true_depth, scale_factor=scale_factor) - - # - extract mesh - if (iter_step % self.val_mesh_freq == 0): - torch.cuda.empty_cache() - self.validate_mesh(self.sdf_network_lod0, - self.sdf_renderer_lod0.extract_geometry, - conditional_volume=con_volume_lod0, lod=0, - threshold=0, - # occupancy_mask=con_valid_mask_volume_lod0[0, 0], - mode='val_bg', meta=meta, - iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat) - torch.cuda.empty_cache() - - if self.num_lods > 1: - self.validate_mesh(self.sdf_network_lod1, - self.sdf_renderer_lod1.extract_geometry, - conditional_volume=con_volume_lod1, lod=1, - # occupancy_mask=con_valid_mask_volume_lod1[0, 0].detach(), - mode='val_bg', meta=meta, - iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat) - - torch.cuda.empty_cache() - - - - def export_mesh_step(self, sample, - perturb_overwrite=-1, - background_rgb=None, - alpha_inter_ratio_lod0=0.0, - alpha_inter_ratio_lod1=0.0, - iter_step=0, - chunk_size=512, - save_vis=False, - ): - # * only support batch_size==1 - # ! attention: the list of string cannot be splited in DataParallel - batch_idx = sample['batch_idx'][0] - meta = sample['meta'][batch_idx] # the scan lighting ref_view info - - sizeW = sample['img_wh'][0][0] - sizeH = sample['img_wh'][0][1] - H, W = sizeH, sizeW - - partial_vol_origin = sample['partial_vol_origin'] # [B, 3] - near, far = sample['query_near_far'][0, :1], sample['query_near_far'][0, 1:] - - # the ray variables - sample_rays = sample['rays'] - rays_o = sample_rays['rays_o'][0] - rays_d = sample_rays['rays_v'][0] - rays_ndc_uv = sample_rays['rays_ndc_uv'][0] - - imgs = sample['images'][0] - intrinsics = sample['intrinsics'][0] - intrinsics_l_4x = intrinsics.clone() - intrinsics_l_4x[:, :2] *= 0.25 - w2cs = sample['w2cs'][0] - c2ws = sample['c2ws'][0] - # target_candidate_w2cs = sample['target_candidate_w2cs'][0] - proj_matrices = sample['affine_mats'] - - - # - the image to render - scale_mat = sample['scale_mat'] # [1,4,4] used to convert mesh into true scale - trans_mat = sample['trans_mat'] - query_c2w = sample['query_c2w'] # [1,4,4] - query_w2c = sample['query_w2c'] # [1,4,4] - true_img = sample['query_image'][0] - true_img = np.uint8(true_img.permute(1, 2, 0).cpu().numpy() * 255) - - depth_min, depth_max = near.cpu().numpy(), far.cpu().numpy() - - scale_factor = sample['scale_factor'][0].cpu().numpy() - true_depth = sample['query_depth'] if 'query_depth' in sample.keys() else None - if true_depth is not None: - true_depth = true_depth[0].cpu().numpy() - true_depth_colored = visualize_depth_numpy(true_depth, [depth_min, depth_max])[0] - else: - true_depth_colored = None - - rays_o = rays_o.reshape(-1, 3).split(chunk_size) - rays_d = rays_d.reshape(-1, 3).split(chunk_size) - - # - obtain conditional features - with torch.no_grad(): - # - obtain conditional features - geometry_feature_maps = self.obtain_pyramid_feature_maps(imgs, lod=0) - # - lod 0 - conditional_features_lod0 = self.sdf_network_lod0.get_conditional_volume( - feature_maps=geometry_feature_maps[None, :, :, :, :], - partial_vol_origin=partial_vol_origin, - proj_mats=proj_matrices, - sizeH=sizeH, - sizeW=sizeW, - lod=0, - ) - - con_volume_lod0 = conditional_features_lod0['dense_volume_scale0'] - con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0'] - coords_lod0 = conditional_features_lod0['coords_scale0'] # [1,3,wX,wY,wZ] - - if self.num_lods > 1: - sdf_volume_lod0 = self.sdf_network_lod0.get_sdf_volume( - con_volume_lod0, con_valid_mask_volume_lod0, - coords_lod0, partial_vol_origin) # [1, 1, dX, dY, dZ] - - depth_maps_lod0, depth_masks_lod0 = None, None - - - if self.num_lods > 1: - geometry_feature_maps_lod1 = self.obtain_pyramid_feature_maps(imgs, lod=1) - - if self.prune_depth_filter: - pre_coords, pre_feats = self.sdf_renderer_lod0.get_valid_sparse_coords_by_sdf_depthfilter( - sdf_volume_lod0[0], coords_lod0[0], con_valid_mask_volume_lod0[0], con_volume_lod0[0], - depth_maps_lod0, proj_matrices[0], - partial_vol_origin, self.sdf_network_lod0.voxel_size, - near, far, self.sdf_network_lod0.voxel_size, 12) - else: - pre_coords, pre_feats = self.sdf_renderer_lod0.get_valid_sparse_coords_by_sdf( - sdf_volume_lod0[0], coords_lod0[0], con_valid_mask_volume_lod0[0], con_volume_lod0[0]) - - pre_coords[:, 1:] = pre_coords[:, 1:] * 2 - - with torch.no_grad(): - conditional_features_lod1 = self.sdf_network_lod1.get_conditional_volume( - feature_maps=geometry_feature_maps_lod1[None, :, :, :, :], - partial_vol_origin=partial_vol_origin, - proj_mats=proj_matrices, - sizeH=sizeH, - sizeW=sizeW, - pre_coords=pre_coords, - pre_feats=pre_feats, - ) - - con_volume_lod1 = conditional_features_lod1['dense_volume_scale1'] - con_valid_mask_volume_lod1 = conditional_features_lod1['valid_mask_volume_scale1'] - - out_rgb_fine = [] - out_normal_fine = [] - out_depth_fine = [] - - out_rgb_fine_lod1 = [] - out_normal_fine_lod1 = [] - out_depth_fine_lod1 = [] - - # # out_depth_fine_explicit = [] - # if save_vis: - # for rays_o_batch, rays_d_batch in zip(rays_o, rays_d): - - # # ****** lod 0 **** - # render_out = self.sdf_renderer_lod0.render( - # rays_o_batch, rays_d_batch, near, far, - # self.sdf_network_lod0, - # self.rendering_network_lod0, - # background_rgb=background_rgb, - # alpha_inter_ratio=alpha_inter_ratio_lod0, - # # * related to conditional feature - # lod=0, - # conditional_volume=con_volume_lod0, - # conditional_valid_mask_volume=con_valid_mask_volume_lod0, - # # * 2d feature maps - # feature_maps=geometry_feature_maps, - # color_maps=imgs, - # w2cs=w2cs, - # intrinsics=intrinsics, - # img_wh=[sizeW, sizeH], - # query_c2w=query_c2w, - # if_render_with_grad=False, - # ) - - # feasible = lambda key: ((key in render_out) and (render_out[key] is not None)) - - # if feasible('depth'): - # out_depth_fine.append(render_out['depth'].detach().cpu().numpy()) - - # # if render_out['color_coarse'] is not None: - # if feasible('color_fine'): - # out_rgb_fine.append(render_out['color_fine'].detach().cpu().numpy()) - # if feasible('gradients') and feasible('weights'): - # if render_out['inside_sphere'] is not None: - # out_normal_fine.append((render_out['gradients'] * render_out['weights'][:, - # :self.n_samples_lod0 + self.n_importance_lod0, - # None] * render_out['inside_sphere'][ - # ..., None]).sum(dim=1).detach().cpu().numpy()) - # else: - # out_normal_fine.append((render_out['gradients'] * render_out['weights'][:, - # :self.n_samples_lod0 + self.n_importance_lod0, - # None]).sum(dim=1).detach().cpu().numpy()) - # del render_out - - # # ****************** lod 1 ************************** - # if self.num_lods > 1: - # for rays_o_batch, rays_d_batch in zip(rays_o, rays_d): - # render_out_lod1 = self.sdf_renderer_lod1.render( - # rays_o_batch, rays_d_batch, near, far, - # self.sdf_network_lod1, - # self.rendering_network_lod1, - # background_rgb=background_rgb, - # alpha_inter_ratio=alpha_inter_ratio_lod1, - # # * related to conditional feature - # lod=1, - # conditional_volume=con_volume_lod1, - # conditional_valid_mask_volume=con_valid_mask_volume_lod1, - # # * 2d feature maps - # feature_maps=geometry_feature_maps_lod1, - # color_maps=imgs, - # w2cs=w2cs, - # intrinsics=intrinsics, - # img_wh=[sizeW, sizeH], - # query_c2w=query_c2w, - # if_render_with_grad=False, - # ) - - # feasible = lambda key: ((key in render_out_lod1) and (render_out_lod1[key] is not None)) - - # if feasible('depth'): - # out_depth_fine_lod1.append(render_out_lod1['depth'].detach().cpu().numpy()) - - # # if render_out['color_coarse'] is not None: - # if feasible('color_fine'): - # out_rgb_fine_lod1.append(render_out_lod1['color_fine'].detach().cpu().numpy()) - # if feasible('gradients') and feasible('weights'): - # if render_out_lod1['inside_sphere'] is not None: - # out_normal_fine_lod1.append((render_out_lod1['gradients'] * render_out_lod1['weights'][:, - # :self.n_samples_lod1 + self.n_importance_lod1, - # None] * - # render_out_lod1['inside_sphere'][ - # ..., None]).sum(dim=1).detach().cpu().numpy()) - # else: - # out_normal_fine_lod1.append((render_out_lod1['gradients'] * render_out_lod1['weights'][:, - # :self.n_samples_lod1 + self.n_importance_lod1, - # None]).sum( - # dim=1).detach().cpu().numpy()) - # del render_out_lod1 - - # # - save visualization of lod 0 - - # self.save_visualization(true_img, true_depth_colored, out_depth_fine, out_normal_fine, - # query_w2c[0], out_rgb_fine, H, W, - # depth_min, depth_max, iter_step, meta, "val_lod0", true_depth=true_depth, scale_factor=scale_factor) - - # if self.num_lods > 1: - # self.save_visualization(true_img, true_depth_colored, out_depth_fine_lod1, out_normal_fine_lod1, - # query_w2c[0], out_rgb_fine_lod1, H, W, - # depth_min, depth_max, iter_step, meta, "val_lod1", true_depth=true_depth, scale_factor=scale_factor) - - # - extract mesh - if (iter_step % self.val_mesh_freq == 0): - torch.cuda.empty_cache() - self.validate_colored_mesh( - density_or_sdf_network=self.sdf_network_lod0, - func_extract_geometry=self.sdf_renderer_lod0.extract_geometry, - conditional_volume=con_volume_lod0, - conditional_valid_mask_volume = con_valid_mask_volume_lod0, - feature_maps=geometry_feature_maps, - color_maps=imgs, - w2cs=w2cs, - target_candidate_w2cs=None, - intrinsics=intrinsics, - rendering_network=self.rendering_network_lod0, - rendering_projector=self.sdf_renderer_lod0.rendering_projector, - lod=0, - threshold=0, - query_c2w=query_c2w, - mode='val_bg', meta=meta, - iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat - ) - torch.cuda.empty_cache() - - if self.num_lods > 1: - self.validate_colored_mesh( - density_or_sdf_network=self.sdf_network_lod1, - func_extract_geometry=self.sdf_renderer_lod1.extract_geometry, - conditional_volume=con_volume_lod1, - conditional_valid_mask_volume = con_valid_mask_volume_lod1, - feature_maps=geometry_feature_maps, - color_maps=imgs, - w2cs=w2cs, - target_candidate_w2cs=None, - intrinsics=intrinsics, - rendering_network=self.rendering_network_lod1, - rendering_projector=self.sdf_renderer_lod1.rendering_projector, - lod=1, - threshold=0, - query_c2w=query_c2w, - mode='val_bg', meta=meta, - iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat - ) - torch.cuda.empty_cache() - # self.validate_mesh(self.sdf_network_lod1, - # self.sdf_renderer_lod1.extract_geometry, - # conditional_volume=con_volume_lod1, lod=1, - # # occupancy_mask=con_valid_mask_volume_lod1[0, 0].detach(), - # mode='val_bg', meta=meta, - # iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat) - - # torch.cuda.empty_cache() - - - def save_visualization(self, true_img, true_colored_depth, out_depth, out_normal, w2cs, out_color, H, W, - depth_min, depth_max, iter_step, meta, comment, out_color_mlp=[], true_depth=None, scale_factor=1.0): - if len(out_color) > 0: - img_fine = (np.concatenate(out_color, axis=0).reshape([H, W, 3]) * 256).clip(0, 255) - - if len(out_color_mlp) > 0: - img_mlp = (np.concatenate(out_color_mlp, axis=0).reshape([H, W, 3]) * 256).clip(0, 255) - - if len(out_normal) > 0: - normal_img = np.concatenate(out_normal, axis=0) - rot = w2cs[:3, :3].detach().cpu().numpy() - # - convert normal from world space to camera space - normal_img = (np.matmul(rot[None, :, :], - normal_img[:, :, None]).reshape([H, W, 3]) * 128 + 128).clip(0, 255) - if len(out_depth) > 0: - pred_depth = np.concatenate(out_depth, axis=0).reshape([H, W]) - pred_depth_colored = visualize_depth_numpy(pred_depth, [depth_min, depth_max])[0] - - if len(out_depth) > 0: - os.makedirs(os.path.join(self.base_exp_dir, 'depths_' + comment), exist_ok=True) - if true_colored_depth is not None: - - if true_depth is not None: - depth_error_map = np.abs(true_depth - pred_depth) * 2.0 / scale_factor - # [256, 256, 1] -> [256, 256, 3] - depth_error_map = np.tile(depth_error_map[:, :, None], [1, 1, 3]) - print("meta: ", meta) - print("scale_factor: ", scale_factor) - print("depth_error_mean: ", depth_error_map.mean()) - # import ipdb; ipdb.set_trace() - depth_visualized = np.concatenate( - [(depth_error_map * 255).astype(np.uint8), true_colored_depth, pred_depth_colored, true_img], axis=1)[:, :, ::-1] - # print("depth_visualized.shape: ", depth_visualized.shape) - # write depth error result text on img, the input is a numpy array of [256, 1024, 3] - # cv.putText(depth_visualized.copy(), "depth_error_mean: {:.4f}".format(depth_error_map.mean()), (10, 30), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) - else: - depth_visualized = np.concatenate( - [true_colored_depth, pred_depth_colored, true_img])[:, :, ::-1] - cv.imwrite( - os.path.join(self.base_exp_dir, 'depths_' + comment, - '{:0>8d}_{}.png'.format(iter_step, meta)), depth_visualized - ) - else: - cv.imwrite( - os.path.join(self.base_exp_dir, 'depths_' + comment, - '{:0>8d}_{}.png'.format(iter_step, meta)), - np.concatenate( - [pred_depth_colored, true_img])[:, :, ::-1]) - if len(out_color) > 0: - os.makedirs(os.path.join(self.base_exp_dir, 'synthesized_color_' + comment), exist_ok=True) - cv.imwrite(os.path.join(self.base_exp_dir, 'synthesized_color_' + comment, - '{:0>8d}_{}.png'.format(iter_step, meta)), - np.concatenate( - [img_fine, true_img])[:, :, ::-1]) # bgr2rgb - # compute psnr (image pixel lie in [0, 255]) - mse_loss = np.mean((img_fine - true_img) ** 2) - psnr = 10 * np.log10(255 ** 2 / mse_loss) - - print("PSNR: ", psnr) - - if len(out_color_mlp) > 0: - os.makedirs(os.path.join(self.base_exp_dir, 'synthesized_color_mlp_' + comment), exist_ok=True) - cv.imwrite(os.path.join(self.base_exp_dir, 'synthesized_color_mlp_' + comment, - '{:0>8d}_{}.png'.format(iter_step, meta)), - np.concatenate( - [img_mlp, true_img])[:, :, ::-1]) # bgr2rgb - - if len(out_normal) > 0: - os.makedirs(os.path.join(self.base_exp_dir, 'normals_' + comment), exist_ok=True) - cv.imwrite(os.path.join(self.base_exp_dir, 'normals_' + comment, - '{:0>8d}_{}.png'.format(iter_step, meta)), - normal_img[:, :, ::-1]) - - def forward(self, sample, - perturb_overwrite=-1, - background_rgb=None, - alpha_inter_ratio_lod0=0.0, - alpha_inter_ratio_lod1=0.0, - iter_step=0, - mode='train', - save_vis=False, - ): - - if mode == 'train': - return self.train_step(sample, - perturb_overwrite=perturb_overwrite, - background_rgb=background_rgb, - alpha_inter_ratio_lod0=alpha_inter_ratio_lod0, - alpha_inter_ratio_lod1=alpha_inter_ratio_lod1, - iter_step=iter_step - ) - elif mode == 'val': - import time - begin = time.time() - result = self.val_step(sample, - perturb_overwrite=perturb_overwrite, - background_rgb=background_rgb, - alpha_inter_ratio_lod0=alpha_inter_ratio_lod0, - alpha_inter_ratio_lod1=alpha_inter_ratio_lod1, - iter_step=iter_step, - save_vis=save_vis, - ) - end = time.time() - print("val_step time: ", end - begin) - return result - elif mode == 'export_mesh': - import time - begin = time.time() - result = self.export_mesh_step(sample, - perturb_overwrite=perturb_overwrite, - background_rgb=background_rgb, - alpha_inter_ratio_lod0=alpha_inter_ratio_lod0, - alpha_inter_ratio_lod1=alpha_inter_ratio_lod1, - iter_step=iter_step, - save_vis=save_vis, - ) - end = time.time() - print("export mesh time: ", end - begin) - return result - def obtain_pyramid_feature_maps(self, imgs, lod=0): - """ - get feature maps of all conditional images - :param imgs: - :return: - """ - - if lod == 0: - extractor = self.pyramid_feature_network_geometry_lod0 - elif lod >= 1: - extractor = self.pyramid_feature_network_geometry_lod1 - - pyramid_feature_maps = extractor(imgs) - - # * the pyramid features are very important, if only use the coarst features, hard to optimize - fused_feature_maps = torch.cat([ - F.interpolate(pyramid_feature_maps[0], scale_factor=4, mode='bilinear', align_corners=True), - F.interpolate(pyramid_feature_maps[1], scale_factor=2, mode='bilinear', align_corners=True), - pyramid_feature_maps[2] - ], dim=1) - - return fused_feature_maps - - def cal_losses_sdf(self, render_out, sample_rays, iter_step=-1, lod=0): - - # loss weight schedule; the regularization terms should be added in later training stage - def get_weight(iter_step, weight): - if lod == 1: - anneal_start = self.anneal_end if lod == 0 else self.anneal_end_lod1 - anneal_end = self.anneal_end if lod == 0 else self.anneal_end_lod1 - anneal_end = anneal_end * 2 - else: - anneal_start = self.anneal_start if lod == 0 else self.anneal_start_lod1 - anneal_end = self.anneal_end if lod == 0 else self.anneal_end_lod1 - anneal_end = anneal_end * 2 - - if iter_step < 0: - return weight - - if anneal_end == 0.0: - return weight - elif iter_step < anneal_start: - return 0.0 - else: - return np.min( - [1.0, - (iter_step - anneal_start) / (anneal_end - anneal_start)]) * weight - - rays_o = sample_rays['rays_o'][0] - rays_d = sample_rays['rays_v'][0] - true_rgb = sample_rays['rays_color'][0] - - if 'rays_depth' in sample_rays.keys(): - true_depth = sample_rays['rays_depth'][0] - else: - true_depth = None - mask = sample_rays['rays_mask'][0] - - color_fine = render_out['color_fine'] - color_fine_mask = render_out['color_fine_mask'] - depth_pred = render_out['depth'] - - variance = render_out['variance'] - cdf_fine = render_out['cdf_fine'] - weight_sum = render_out['weights_sum'] - - gradient_error_fine = render_out['gradient_error_fine'] - - sdf = render_out['sdf'] - - # * color generated by mlp - color_mlp = render_out['color_mlp'] - color_mlp_mask = render_out['color_mlp_mask'] - - if color_fine is not None: - # Color loss - color_mask = color_fine_mask if color_fine_mask is not None else mask - # import ipdb; ipdb.set_trace() - color_mask = color_mask[..., 0] - color_error = (color_fine[color_mask] - true_rgb[color_mask]) - # print("Nan number", torch.isnan(color_error).sum()) - # print("Color error shape", color_error.shape) - # import ipdb; ipdb.set_trace() - color_fine_loss = F.l1_loss(color_error, torch.zeros_like(color_error).to(color_error.device), - reduction='mean') - # print(color_fine_loss) - psnr = 20.0 * torch.log10( - 1.0 / (((color_fine[color_mask] - true_rgb[color_mask]) ** 2).mean() / (3.0)).sqrt()) - else: - color_fine_loss = 0. - psnr = 0. - - if color_mlp is not None: - # Color loss - color_mlp_mask = color_mlp_mask[..., 0] - color_error_mlp = (color_mlp[color_mlp_mask] - true_rgb[color_mlp_mask]) - color_mlp_loss = F.l1_loss(color_error_mlp, - torch.zeros_like(color_error_mlp).to(color_error_mlp.device), - reduction='mean') - - psnr_mlp = 20.0 * torch.log10( - 1.0 / (((color_mlp[color_mlp_mask] - true_rgb[color_mlp_mask]) ** 2).mean() / (3.0)).sqrt()) - else: - color_mlp_loss = 0. - psnr_mlp = 0. - - # depth loss is only used for inference, not included in total loss - if true_depth is not None: - # depth_loss = self.depth_criterion(depth_pred, true_depth, mask) - depth_loss = self.depth_criterion(depth_pred, true_depth) - - # # depth evaluation - # depth_statis = compute_depth_errors(depth_pred.detach().cpu().numpy(), true_depth.cpu().numpy()) - # depth_statis = numpy2tensor(depth_statis, device=rays_o.device) - depth_statis = None - else: - depth_loss = 0. - depth_statis = None - - sparse_loss_1 = torch.exp( - -1 * torch.abs(render_out['sdf_random']) * self.sdf_decay_param).mean() # - should equal - sparse_loss_2 = torch.exp(-1 * torch.abs(sdf) * self.sdf_decay_param).mean() - sparse_loss = (sparse_loss_1 + sparse_loss_2) / 2 - - sdf_mean = torch.abs(sdf).mean() - sparseness_1 = (torch.abs(sdf) < 0.01).to(torch.float32).mean() - sparseness_2 = (torch.abs(sdf) < 0.02).to(torch.float32).mean() - - # Eikonal loss - gradient_error_loss = gradient_error_fine - - # ! the first 50k, don't use bg constraint - fg_bg_weight = 0.0 if iter_step < 50000 else get_weight(iter_step, self.fg_bg_weight) - - # Mask loss, optional - # The images of DTU dataset contain large black regions (0 rgb values), - # can use this data prior to make fg more clean - background_loss = 0.0 - fg_bg_loss = 0.0 - if self.fg_bg_weight > 0 and torch.mean((mask < 0.5).to(torch.float32)) > 0.02: - weights_sum_fg = render_out['weights_sum_fg'] - fg_bg_error = (weights_sum_fg - mask)[mask < 0.5] - fg_bg_loss = F.l1_loss(fg_bg_error, - torch.zeros_like(fg_bg_error).to(fg_bg_error.device), - reduction='mean') - - - - loss = 1.0 * depth_loss + color_fine_loss + color_mlp_loss + \ - sparse_loss * get_weight(iter_step, self.sdf_sparse_weight) + \ - fg_bg_loss * fg_bg_weight + \ - gradient_error_loss * self.sdf_igr_weight # ! gradient_error_loss need a mask - - losses = { - "loss": loss, - "depth_loss": depth_loss, - "color_fine_loss": color_fine_loss, - "color_mlp_loss": color_mlp_loss, - "gradient_error_loss": gradient_error_loss, - "background_loss": background_loss, - "sparse_loss": sparse_loss, - "sparseness_1": sparseness_1, - "sparseness_2": sparseness_2, - "sdf_mean": sdf_mean, - "psnr": psnr, - "psnr_mlp": psnr_mlp, - "weights_sum": render_out['weights_sum'], - "weights_sum_fg": render_out['weights_sum_fg'], - "alpha_sum": render_out['alpha_sum'], - "variance": render_out['variance'], - "sparse_weight": get_weight(iter_step, self.sdf_sparse_weight), - "fg_bg_weight": fg_bg_weight, - "fg_bg_loss": fg_bg_loss, # added by jha, bug of sparseNeuS - } - # print("[TEST]: weights_sum in trainner forward", losses['weights_sum'].mean()) - losses = numpy2tensor(losses, device=rays_o.device) - return loss, losses, depth_statis - - @torch.no_grad() - def validate_mesh(self, density_or_sdf_network, func_extract_geometry, world_space=True, resolution=360, - threshold=0.0, mode='val', - # * 3d feature volume - conditional_volume=None, lod=None, occupancy_mask=None, - bound_min=[-1, -1, -1], bound_max=[1, 1, 1], meta='', iter_step=0, scale_mat=None, - trans_mat=None - ): - - bound_min = torch.tensor(bound_min, dtype=torch.float32) - bound_max = torch.tensor(bound_max, dtype=torch.float32) - - vertices, triangles, fields = func_extract_geometry( - density_or_sdf_network, - bound_min, bound_max, resolution=resolution, - threshold=threshold, device=conditional_volume.device, - # * 3d feature volume - conditional_volume=conditional_volume, lod=lod, - occupancy_mask=occupancy_mask - ) - - - if scale_mat is not None: - scale_mat_np = scale_mat.cpu().numpy() - vertices = vertices * scale_mat_np[0][0, 0] + scale_mat_np[0][:3, 3][None] - - if trans_mat is not None: # w2c_ref_inv - trans_mat_np = trans_mat.cpu().numpy() - vertices_homo = np.concatenate([vertices, np.ones_like(vertices[:, :1])], axis=1) - vertices = np.matmul(trans_mat_np, vertices_homo[:, :, None])[:, :3, 0] - - mesh = trimesh.Trimesh(vertices, triangles) - os.makedirs(os.path.join(self.base_exp_dir, 'meshes_' + mode), exist_ok=True) - mesh.export(os.path.join(self.base_exp_dir, 'meshes_' + mode, - 'mesh_{:0>8d}_{}_lod{:0>1d}.ply'.format(iter_step, meta, lod))) - - - - def validate_colored_mesh(self, density_or_sdf_network, func_extract_geometry, world_space=True, resolution=360, - threshold=0.0, mode='val', - # * 3d feature volume - conditional_volume=None, - conditional_valid_mask_volume=None, - feature_maps=None, - color_maps = None, - w2cs=None, - target_candidate_w2cs=None, - intrinsics=None, - rendering_network=None, - rendering_projector=None, - query_c2w=None, - lod=None, occupancy_mask=None, - bound_min=[-1, -1, -1], bound_max=[1, 1, 1], meta='', iter_step=0, scale_mat=None, - trans_mat=None - ): - - bound_min = torch.tensor(bound_min, dtype=torch.float32) - bound_max = torch.tensor(bound_max, dtype=torch.float32) - - vertices, triangles, fields = func_extract_geometry( - density_or_sdf_network, - bound_min, bound_max, resolution=resolution, - threshold=threshold, device=conditional_volume.device, - # * 3d feature volume - conditional_volume=conditional_volume, lod=lod, - occupancy_mask=occupancy_mask - ) - - - with torch.no_grad(): - ren_geo_feats, ren_rgb_feats, ren_ray_diff, ren_mask, _, _ = rendering_projector.compute_view_independent( - torch.tensor(vertices).to(conditional_volume), - lod=0, - # * 3d geometry feature volumes - geometryVolume=conditional_volume[0], - geometryVolumeMask=conditional_valid_mask_volume[0], - sdf_network=density_or_sdf_network, - # * 2d rendering feature maps - rendering_feature_maps=feature_maps, # [n_view, 56, 256, 256] - color_maps=color_maps, - w2cs=w2cs, - target_candidate_w2cs=target_candidate_w2cs, - intrinsics=intrinsics, - img_wh=[256,256], - query_img_idx=0, # the index of the N_views dim for rendering - query_c2w=query_c2w, - ) - - - vertices_color, rendering_valid_mask = rendering_network( - ren_geo_feats, ren_rgb_feats, ren_ray_diff, ren_mask) - - - - if scale_mat is not None: - scale_mat_np = scale_mat.cpu().numpy() - vertices = vertices * scale_mat_np[0][0, 0] + scale_mat_np[0][:3, 3][None] - - if trans_mat is not None: # w2c_ref_inv - trans_mat_np = trans_mat.cpu().numpy() - vertices_homo = np.concatenate([vertices, np.ones_like(vertices[:, :1])], axis=1) - vertices = np.matmul(trans_mat_np, vertices_homo[:, :, None])[:, :3, 0] - # import ipdb; ipdb.set_trace() - vertices_color = np.array(vertices_color.squeeze(0).cpu() * 255, dtype=np.uint8) - mesh = trimesh.Trimesh(vertices, triangles, vertex_colors=vertices_color) - os.makedirs(os.path.join(self.base_exp_dir, 'meshes_' + mode, 'lod{:0>1d}'.format(lod)), exist_ok=True) - mesh.export(os.path.join(self.base_exp_dir, 'meshes_' + mode, 'lod{:0>1d}'.format(lod), - 'mesh_{:0>8d}_{}_lod{:0>1d}.ply'.format(iter_step, meta, lod))) \ No newline at end of file diff --git a/SparseNeuS_demo_v1/ops/generate_grids.py b/SparseNeuS_demo_v1/ops/generate_grids.py index 884c37793131323c566c6d1a738f06d497bbd2fb..304c1c4c1a424c4bc219f39815ed43fea1d9de5d 100644 --- a/SparseNeuS_demo_v1/ops/generate_grids.py +++ b/SparseNeuS_demo_v1/ops/generate_grids.py @@ -12,7 +12,7 @@ def generate_grid(n_vox, interval): with torch.no_grad(): # Create voxel grid grid_range = [torch.arange(0, n_vox[axis], interval) for axis in range(3)] - grid = torch.stack(torch.meshgrid(grid_range[0], grid_range[1], grid_range[2])) # 3 dx dy dz + grid = torch.stack(torch.meshgrid(grid_range[0], grid_range[1], grid_range[2], indexing="ij")) # 3 dx dy dz # ! don't create tensor on gpu; imbalanced gpu memory in ddp mode grid = grid.unsqueeze(0).type(torch.float32) # 1 3 dx dy dz diff --git a/SparseNeuS_demo_v1/utils/training_utils.py b/SparseNeuS_demo_v1/utils/training_utils.py deleted file mode 100644 index 5d128ba2beda39b708850bd4c17c4603a8a17848..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/utils/training_utils.py +++ /dev/null @@ -1,129 +0,0 @@ -import numpy as np -import torchvision.utils as vutils -import torch, random -import torch.nn.functional as F - - -# print arguments -def print_args(args): - print("################################ args ################################") - for k, v in args.__dict__.items(): - print("{0: <10}\t{1: <30}\t{2: <20}".format(k, str(v), str(type(v)))) - print("########################################################################") - - -# torch.no_grad warpper for functions -def make_nograd_func(func): - def wrapper(*f_args, **f_kwargs): - with torch.no_grad(): - ret = func(*f_args, **f_kwargs) - return ret - - return wrapper - - -# convert a function into recursive style to handle nested dict/list/tuple variables -def make_recursive_func(func): - def wrapper(vars, device=None): - if isinstance(vars, list): - return [wrapper(x, device) for x in vars] - elif isinstance(vars, tuple): - return tuple([wrapper(x, device) for x in vars]) - elif isinstance(vars, dict): - return {k: wrapper(v, device) for k, v in vars.items()} - else: - return func(vars, device) - - return wrapper - - -@make_recursive_func -def tensor2float(vars): - if isinstance(vars, float): - return vars - elif isinstance(vars, torch.Tensor): - return vars.data.item() - else: - raise NotImplementedError("invalid input type {} for tensor2float".format(type(vars))) - - -@make_recursive_func -def tensor2numpy(vars): - if isinstance(vars, np.ndarray): - return vars - elif isinstance(vars, torch.Tensor): - return vars.detach().cpu().numpy().copy() - else: - raise NotImplementedError("invalid input type {} for tensor2numpy".format(type(vars))) - - -@make_recursive_func -def numpy2tensor(vars, device='cpu'): - if not isinstance(vars, torch.Tensor) and vars is not None : - return torch.tensor(vars, device=device) - elif isinstance(vars, torch.Tensor): - return vars - elif vars is None: - return vars - else: - raise NotImplementedError("invalid input type {} for float2tensor".format(type(vars))) - - -@make_recursive_func -def tocuda(vars, device='cuda'): - if isinstance(vars, torch.Tensor): - return vars.to(device) - elif isinstance(vars, str): - return vars - else: - raise NotImplementedError("invalid input type {} for tocuda".format(type(vars))) - - -import torch.distributed as dist - - -def synchronize(): - """ - Helper function to synchronize (barrier) among all processes when - using distributed training - """ - if not dist.is_available(): - return - if not dist.is_initialized(): - return - world_size = dist.get_world_size() - if world_size == 1: - return - dist.barrier() - - -def get_world_size(): - if not dist.is_available(): - return 1 - if not dist.is_initialized(): - return 1 - return dist.get_world_size() - - -def reduce_scalar_outputs(scalar_outputs): - world_size = get_world_size() - if world_size < 2: - return scalar_outputs - with torch.no_grad(): - names = [] - scalars = [] - for k in sorted(scalar_outputs.keys()): - names.append(k) - if isinstance(scalar_outputs[k], torch.Tensor): - scalars.append(scalar_outputs[k]) - else: - scalars.append(torch.tensor(scalar_outputs[k], device='cuda')) - scalars = torch.stack(scalars, dim=0) - dist.reduce(scalars, dst=0) - if dist.get_rank() == 0: - # only main process gets accumulated, so only divide by - # world_size in this case - scalars /= world_size - reduced_scalars = {k: v for k, v in zip(names, scalars)} - - return reduced_scalars diff --git a/SparseNeuS_demo_v1/weights/ckpt.pth b/SparseNeuS_demo_v1/weights/ckpt.pth deleted file mode 100644 index ea22ffa970c253e2f1d6cccbe195f703027264f6..0000000000000000000000000000000000000000 --- a/SparseNeuS_demo_v1/weights/ckpt.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ee9a0027096b3f4f304e2801ebe41545241f974f7d812dc802ac70c8aeeab2b2 -size 6859767 diff --git a/sam_utils.py b/sam_utils.py index 78e56f4ef6645c336fba911b2d7cc7523cfca0d7..fe6371910204a4b1826261c2eed450bfdb9244cf 100644 --- a/sam_utils.py +++ b/sam_utils.py @@ -7,16 +7,12 @@ import time from segment_anything import sam_model_registry, SamPredictor def sam_init(device_id=0): - import inspect - dir_path = os.path.dirname(os.path.abspath( - inspect.getfile(inspect.currentframe()))) - sam_checkpoint = os.path.join(dir_path, "sam_vit_h_4b8939.pth") + sam_checkpoint = os.path.join(os.path.dirname(__file__), "sam_vit_h_4b8939.pth") model_type = "vit_h" device = "cuda:{}".format(device_id) if torch.cuda.is_available() else "cpu" - sam = sam_model_registry[model_type](checkpoint=sam_checkpoint) - sam.to(device=device) + sam = sam_model_registry[model_type](checkpoint=sam_checkpoint).to(device=device) predictor = SamPredictor(sam) return predictor diff --git a/utils.py b/utils.py index 34e9627669d3f8976dcdeb7f9b9b2a41e00c7441..5549ce1de43f3f0d6e385506941852e503a8184d 100644 --- a/utils.py +++ b/utils.py @@ -1,84 +1,9 @@ -import os, json +import os +import json import numpy as np -import base64 -# import matplotlib.pyplot as plt import cv2 from PIL import Image -def image_grid(imgs, rows, cols): - assert len(imgs) == rows*cols - w, h = imgs[0].size - grid = Image.new('RGB', size=(cols*w, rows*h)) - grid_w, grid_h = grid.size - - for i, img in enumerate(imgs): - grid.paste(img, box=(i%cols*w, i//cols*h)) - return grid -def tensor2img(tensor): - return Image.fromarray((tensor.detach().cpu().numpy().transpose(1,2,0)*255).astype("uint8")) -def titled_image(img, title="main"): - # add caption to raw_im - from PIL import ImageDraw, ImageFont - titled_image = img.copy() - draw = ImageDraw.Draw(titled_image) - import cv2 - font_path = os.path.join(cv2.__path__[0],'qt','fonts','DejaVuSans.ttf') - font = ImageFont.truetype(font_path, size=20) - draw.text((0, 0), title, fill=(255, 0, 0), font=font) - # show the drawed image - return titled_image - -def find_image_file(shape_dir): - image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.svg', '.webp'] - processed_images = ['image_sam.png', 'input_256.png', "input_256_rgba.png"] - image_files = [file for file in os.listdir(shape_dir) if os.path.splitext(file)[1].lower() in image_extensions and file not in processed_images] - return image_files[0] - -def encode_image(filepath): - with open(filepath, 'rb') as f: - image_bytes = f.read() - encoded = str(base64.b64encode(image_bytes), 'utf-8') - return "data:image/jpg;base64,"+encoded - - -# contrast correction, rescale and recenter -def image_preprocess(shape_dir, lower_contrast=True, rescale=True): - nickname = shape_dir.split("/")[-1] - img_path = os.path.join(shape_dir, "image_sam.png") - out_path = os.path.join(shape_dir, "input_256.png") - out_path_rgba = os.path.join(shape_dir, "input_256_rgba.png") - image = Image.open(img_path) #[:,90:550] - # print(image.size) - image_arr = np.array(image) - in_w, in_h = image_arr.shape[:2] - - if lower_contrast: - alpha = 0.8 # Contrast control (1.0-3.0) - beta = 0 # Brightness control (0-100) - # Apply the contrast adjustment - image_arr = cv2.convertScaleAbs(image_arr, alpha=alpha, beta=beta) - image_arr[image_arr[...,-1]>200, -1] = 255 - - ret, mask = cv2.threshold(np.array(image.split()[-1]), 0, 255, cv2.THRESH_BINARY) - x, y, w, h = cv2.boundingRect(mask) - max_size = max(w, h) - print(nickname, max_size/np.max(image.size)) - ratio = 0.75 - if rescale: - side_len = int(max_size / ratio) - else: - side_len = in_w - padded_image = np.zeros((side_len, side_len, 4), dtype=np.uint8) - center = side_len//2 - padded_image[center-h//2:center-h//2+h, center-w//2:center-w//2+w] = image_arr[y:y+h, x:x+w] - rgba = Image.fromarray(padded_image).resize((256, 256), Image.LANCZOS) - rgba.save(out_path_rgba) - - rgba_arr = np.array(rgba) / 255.0 - rgb = rgba_arr[...,:3] * rgba_arr[...,-1:] + (1 - rgba_arr[...,-1:]) - rgb = Image.fromarray((rgb * 255).astype(np.uint8)) - rgb.save(out_path) - # contrast correction, rescale and recenter def image_preprocess_nosave(input_image, lower_contrast=True, rescale=True): @@ -159,9 +84,7 @@ def get_poses(init_elev): [view_theta + source for view_theta in eyelevel_theta for source in source_theta_delta]) return img_ids, calc_pose(elevations, azimuths, len(azimuths)).cpu().numpy() -# eval_path = "/objaverse-processed/zero12345_img/%s" % dataset -# for shape in os.listdir(eval_path): -# shape_dir = os.path.join(eval_path, shape) + def gen_poses(shape_dir, pose_est): img_ids, input_poses = get_poses(pose_est) @@ -178,4 +101,3 @@ def gen_poses(shape_dir, pose_est): json_path = os.path.join(shape_dir, 'pose.json') with open(json_path, 'w') as f: json.dump(out_dict, f, indent=4) - # break diff --git a/zero123_utils.py b/zero123_utils.py index 929ecd318eb984f7425356c67357ca8e59755d3b..b3ad274d47c87065ed576e1cfb803c4f741c89e6 100644 --- a/zero123_utils.py +++ b/zero123_utils.py @@ -9,7 +9,7 @@ from ldm.models.diffusion.ddim import DDIMSampler from omegaconf import OmegaConf from PIL import Image from rich import print -from transformers import AutoFeatureExtractor +from transformers import CLIPImageProcessor from torch import autocast from torchvision import transforms @@ -35,28 +35,21 @@ def load_model_from_config(config, ckpt, device, verbose=False): def init_model(device, ckpt): - import inspect - dir_path = os.path.dirname(os.path.abspath( - inspect.getfile(inspect.currentframe()))) - config = os.path.join(dir_path, 'configs/sd-objaverse-finetune-c_concat-256.yaml') - + config = os.path.join(os.path.dirname(__file__), 'configs/sd-objaverse-finetune-c_concat-256.yaml') config = OmegaConf.load(config) # Instantiate all models beforehand for efficiency. models = dict() print('Instantiating LatentDiffusion...') - models['turncam'] = load_model_from_config(config, ckpt, device=device) - # print('Instantiating Carvekit HiInterface...') - # models['carvekit'] = create_carvekit_interface() + models['turncam'] = torch.compile(load_model_from_config(config, ckpt, device=device)) print('Instantiating StableDiffusionSafetyChecker...') models['nsfw'] = StableDiffusionSafetyChecker.from_pretrained( 'CompVis/stable-diffusion-safety-checker').to(device) - print('Instantiating AutoFeatureExtractor...') - models['clip_fe'] = AutoFeatureExtractor.from_pretrained( - 'CompVis/stable-diffusion-safety-checker') + models['clip_fe'] = CLIPImageProcessor.from_pretrained( + "openai/clip-vit-large-patch14") # We multiply all by some factor > 1 to make them less likely to be triggered. - models['nsfw'].concept_embeds_weights *= 1.07 - models['nsfw'].special_care_embeds_weights *= 1.07 + models['nsfw'].concept_embeds_weights *= 1.2 + models['nsfw'].special_care_embeds_weights *= 1.2 return models @@ -74,7 +67,6 @@ def sample_model_batch(model, sampler, input_im, xs, ys, n_samples=4, precision= c = model.cc_projection(c) cond = {} cond['c_crossattn'] = [c] - # c_concat = model.encode_first_stage((input_im.to(c.device))).mode().detach() cond['c_concat'] = [model.encode_first_stage(input_im).mode().detach() .repeat(n_samples, 1, 1, 1)] if scale != 1.0: @@ -101,30 +93,6 @@ def sample_model_batch(model, sampler, input_im, xs, ys, n_samples=4, precision= del cond, c, x_samples_ddim, samples_ddim, uc, input_im torch.cuda.empty_cache() return ret_imgs - - -def predict_stage1(model, sampler, input_img_path, save_path_8, adjust_set=[], device="cuda"): - raw_im = Image.open(input_img_path) - # raw_im = raw_im.resize([256, 256], Image.LANCZOS) - # input_im_init = preprocess_image(models, raw_im, preprocess=False) - input_im_init = np.asarray(raw_im, dtype=np.float32) / 255.0 - input_im = transforms.ToTensor()(input_im_init).unsqueeze(0).to(device) - input_im = input_im * 2 - 1 - - # stage 1: 8 - delta_x_1_8 = [0] * 4 + [30] * 4 + [-30] * 4 - delta_y_1_8 = [0+90*(i%4) if i < 4 else 30+90*(i%4) for i in range(8)] + [30+90*(i%4) for i in range(4)] - - - x_samples_ddims_8 = sample_model_batch(model, sampler, input_im, delta_x_1_8, delta_y_1_8, n_samples=len(delta_x_1_8)) - for stage1_idx in range(len(x_samples_ddims_8)): - if adjust_set != [] and stage1_idx not in adjust_set: - continue - x_sample = 255.0 * rearrange(x_samples_ddims_8[stage1_idx].numpy(), 'c h w -> h w c') - Image.fromarray(x_sample.astype(np.uint8)).save(os.path.join(save_path_8, '%d.png'%(stage1_idx))) - del x_samples_ddims_8 - del input_im - torch.cuda.empty_cache() @torch.no_grad() def predict_stage1_gradio(model, raw_im, save_path = "", adjust_set=[], device="cuda", ddim_steps=75, scale=3.0): @@ -162,9 +130,6 @@ def predict_stage1_gradio(model, raw_im, save_path = "", adjust_set=[], device=" torch.cuda.empty_cache() return ret_imgs - - - def infer_stage_2(model, save_path_stage1, save_path_stage2, delta_x_2, delta_y_2, indices, device, ddim_steps=75, scale=3.0): for stage1_idx in indices: # save stage 1 image @@ -192,7 +157,6 @@ def infer_stage_2(model, save_path_stage1, save_path_stage2, delta_x_2, delta_y_ torch.cuda.empty_cache() def zero123_infer(model, input_dir_path, start_idx=0, end_idx=12, indices=None, device="cuda", ddim_steps=75, scale=3.0): - # input_dir_path = "/objaverse-processed/zero12345_img/eval/teddy_wild" # input_img_path = os.path.join(input_dir_path, "input_256.png") save_path_8 = os.path.join(input_dir_path, "stage1_8") save_path_8_2 = os.path.join(input_dir_path, "stage2_8")