diff --git a/SparseNeuS_demo_v1/confs/blender_general_lod1_val_new.conf b/SparseNeuS_demo_v1/confs/blender_general_lod1_val_new.conf
deleted file mode 100644
index dacbc09968c2f4cd6f7348dd93552ea5d8876236..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/confs/blender_general_lod1_val_new.conf
+++ /dev/null
@@ -1,137 +0,0 @@
-# - for the lod1 geometry network, using adaptive cost for sparse cost regularization network
-#- for lod1 rendering network, using depth-adaptive render
-
-general {
-  base_exp_dir = ./exp/val/1_4_only_narrow_lod1
-
-  recording = [
-    ./,
-    ./data
-    ./ops
-    ./models
-    ./loss
-  ]
-}
-
-dataset {
-  # local path
-  trainpath = /objaverse-processed/zero12345_img/eval_selected
-  valpath = /objaverse-processed/zero12345_img/eval_selected
-  testpath = /objaverse-processed/zero12345_img/eval_selected
-  # trainpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/
-  # valpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/
-  # testpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/
-  imgScale_train = 1.0
-  imgScale_test = 1.0
-  nviews = 5
-  clean_image = True
-  importance_sample = True
-  test_ref_views = [23]
-
-  # test dataset
-  test_n_views = 2
-  test_img_wh = [256, 256]
-  test_clip_wh = [0, 0]
-  test_scan_id = scan110
-  train_img_idx = [49, 50, 52, 53, 54, 56, 58] #[21, 22, 23, 24, 25]  #
-  test_img_idx = [51, 55, 57]  #[32, 33, 34]  #
-
-  test_dir_comment = train
-}
-
-train {
-  learning_rate = 2e-4
-  learning_rate_milestone = [100000, 150000, 200000]
-  learning_rate_factor = 0.5
-  end_iter = 200000
-  save_freq = 5000
-  val_freq = 1
-  val_mesh_freq =1
-  report_freq = 100
-
-  N_rays = 512
-
-  validate_resolution_level = 4
-  anneal_start = 0
-  anneal_end = 25000
-  anneal_start_lod1 = 0
-  anneal_end_lod1 = 15000
-
-  use_white_bkgd = True
-
-  # Loss
-  # ! for training the lod1 network, don't use this regularization in first 10k steps; then use the regularization
-  sdf_igr_weight = 0.1
-  sdf_sparse_weight = 0.02  # 0.002 for lod1 network;  0.02 for lod0 network
-  sdf_decay_param = 100   # cannot be too large, which decide the tsdf range
-  fg_bg_weight = 0.01  # first 0.01
-  bg_ratio = 0.3
-
-  if_fix_lod0_networks = True
-}
-
-model {
-  num_lods = 2
-
-  sdf_network_lod0 {
-    lod = 0,
-    ch_in = 56,  # the channel num of fused pyramid features
-    voxel_size = 0.02105263,  # 0.02083333, should be 2/95
-    vol_dims = [96, 96, 96],
-    hidden_dim = 128,
-    cost_type = variance_mean
-    d_pyramid_feature_compress = 16,
-    regnet_d_out = 16,
-    num_sdf_layers = 4,
-    # position embedding
-    multires = 6
-  }
-
-
-  sdf_network_lod1 {
-    lod = 1,
-    ch_in = 56,  # the channel num of fused pyramid features
-    voxel_size = 0.0104712, #0.01041667, should be 2/191
-    vol_dims = [192, 192, 192],
-    hidden_dim = 128,
-    cost_type = variance_mean
-    d_pyramid_feature_compress = 8,
-    regnet_d_out = 8,
-    num_sdf_layers = 4,
-    # position embedding
-    multires = 6
-  }
-
-
-  variance_network {
-    init_val = 0.2
-  }
-
-  variance_network_lod1 {
-    init_val = 0.2
-  }
-
-  rendering_network {
-    in_geometry_feat_ch = 16
-    in_rendering_feat_ch = 56
-    anti_alias_pooling = True
-  }
-
-  rendering_network_lod1 {
-    in_geometry_feat_ch = 8
-    in_rendering_feat_ch = 56
-    anti_alias_pooling = True
-
-  }
-
-
-  trainer {
-    n_samples_lod0 = 64
-    n_importance_lod0 = 64
-    n_samples_lod1 = 64
-    n_importance_lod1 = 64
-    n_outside = 0  # 128 if render_outside_uniform_sampling
-    perturb = 1.0
-    alpha_type = div
-  }
-}
diff --git a/SparseNeuS_demo_v1/confs/one2345_lod0_val_demo.conf b/SparseNeuS_demo_v1/confs/one2345_lod0_val_demo.conf
index 7be6d4098d66473f63252c42d0a1bd25e2338a6b..e591ac038b854140efc81cdad3c8dc7838f03a83 100644
--- a/SparseNeuS_demo_v1/confs/one2345_lod0_val_demo.conf
+++ b/SparseNeuS_demo_v1/confs/one2345_lod0_val_demo.conf
@@ -18,8 +18,6 @@ dataset {
   valpath = ../ # !!! where you store the validation data
   testpath = ../
 
-
-
   imgScale_train = 1.0
   imgScale_test = 1.0
   nviews = 5
diff --git a/SparseNeuS_demo_v1/data/__init__.py b/SparseNeuS_demo_v1/data/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/SparseNeuS_demo_v1/data/blender.py b/SparseNeuS_demo_v1/data/blender.py
deleted file mode 100644
index c027f3e05367497c91026b362af4378fe31ff24a..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender.py
+++ /dev/null
@@ -1,340 +0,0 @@
-import torch
-from torch.utils.data import Dataset
-import json
-import numpy as np
-import os
-from PIL import Image
-from torchvision import transforms as T
-from kornia import create_meshgrid
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import cv2 as cv
-from data.scene import get_boundingbox
-
-
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0]
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def get_rays(directions, c2w):
-    """
-    Get ray origin and normalized directions in world coordinate for all pixels in one image.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        directions: (H, W, 3) precomputed ray directions in camera coordinate
-        c2w: (3, 4) transformation matrix from camera coordinate to world coordinate
-    Outputs:
-        rays_o: (H*W, 3), the origin of the rays in world coordinate
-        rays_d: (H*W, 3), the normalized direction of the rays in world coordinate
-    """
-    # Rotate ray directions from camera coordinate to the world coordinate
-    rays_d = directions @ c2w[:3, :3].T  # (H, W, 3)
-    # rays_d = rays_d / torch.norm(rays_d, dim=-1, keepdim=True)
-    # The origin of all rays is the camera origin in world coordinate
-    rays_o = c2w[:3, 3].expand(rays_d.shape)  # (H, W, 3)
-
-    rays_d = rays_d.view(-1, 3)
-    rays_o = rays_o.view(-1, 3)
-
-    return rays_o, rays_d
-
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-class BlenderDataset(Dataset):
-    def __init__(self, root_dir, split, scan_id, n_views, train_img_idx=[], test_img_idx=[],
-                 img_wh=[800, 800], clip_wh=[0, 0], original_img_wh=[800, 800],
-                 N_rays=512, h_patch_size=5, near=2.0, far=6.0):
-        self.root_dir = root_dir
-        self.split = split
-        self.img_wh = img_wh
-        self.clip_wh = clip_wh
-        self.define_transforms()
-        self.train_img_idx = train_img_idx
-        self.test_img_idx = test_img_idx
-        self.N_rays = N_rays
-        self.h_patch_size = h_patch_size  # used to extract patch for supervision
-        self.n_views = n_views
-        self.near, self.far = near, far
-        self.blender2opencv = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
-
-        with open(os.path.join(self.root_dir, f"transforms_{self.split}.json"), 'r') as f:
-            self.meta = json.load(f)
-
-
-        self.read_meta(near, far)
-        # import ipdb; ipdb.set_trace()
-        self.raw_near_fars = np.stack([np.array([self.near, self.far]) for i in range(len(self.meta['frames']))])
-
-
-        # ! estimate scale_mat
-        self.scale_mat, self.scale_factor = self.cal_scale_mat(
-            img_hw=[self.img_wh[1], self.img_wh[0]],
-            intrinsics=self.all_intrinsics[self.train_img_idx],
-            extrinsics=self.all_w2cs[self.train_img_idx],
-            near_fars=self.raw_near_fars[self.train_img_idx],
-            factor=1.1)
-        # self.scale_mat = np.eye(4)
-        # self.scale_factor = 1.0
-        # import ipdb; ipdb.set_trace()
-        # * after scaling and translation, unit bounding box
-        self.scaled_intrinsics, self.scaled_w2cs, self.scaled_c2ws, \
-        self.scaled_affine_mats, self.scaled_near_fars = self.scale_cam_info()
-
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-        self.partial_vol_origin = torch.Tensor([-1., -1., -1.])
-        self.white_back = True
-
-    def read_meta(self, near=2.0, far=6.0):
-
-
-        self.ref_img_idx = self.train_img_idx[0]
-        ref_c2w = np.array(self.meta['frames'][self.ref_img_idx]['transform_matrix']) @ self.blender2opencv
-        # ref_c2w = torch.FloatTensor(ref_c2w)
-        self.ref_c2w = ref_c2w
-        self.ref_w2c = np.linalg.inv(ref_c2w)
-
-
-        w, h = self.img_wh
-        self.focal = 0.5 * 800 / np.tan(0.5 * self.meta['camera_angle_x'])  # original focal length
-        self.focal *= self.img_wh[0] / 800  # modify focal length to match size self.img_wh
-
-        # bounds, common for all scenes
-        self.near = near
-        self.far = far
-        self.bounds = np.array([self.near, self.far])
-
-        # ray directions for all pixels, same for all images (same H, W, focal)
-        self.directions = get_ray_directions(h, w, [self.focal,self.focal])  # (h, w, 3)
-        intrinsics = np.eye(4)
-        intrinsics[:3, :3] = np.array([[self.focal,0,w/2],[0,self.focal,h/2],[0,0,1]]).astype(np.float32)
-        self.intrinsics = intrinsics
-
-        self.image_paths = []
-        self.poses = []
-        self.all_rays = []
-        self.all_images = []
-        self.all_masks = []
-        self.all_w2cs = []
-        self.all_intrinsics = []
-        for frame in self.meta['frames']:
-            pose = np.array(frame['transform_matrix']) @ self.blender2opencv
-            self.poses += [pose]
-            c2w = torch.FloatTensor(pose)
-            w2c = np.linalg.inv(c2w)
-            image_path = os.path.join(self.root_dir, f"{frame['file_path']}.png")
-            self.image_paths += [image_path]
-            img = Image.open(image_path)
-            img = img.resize(self.img_wh, Image.LANCZOS)
-            img = self.transform(img)  # (4, h, w)
-            
-            self.all_masks += [img[-1:,:]>0]
-            # img = img[:3, :] * img[ -1:,:] + (1 - img[-1:, :])  # blend A to RGB
-            img = img[:3, :] * img[ -1:,:] 
-            img = img.numpy()  # (3, h, w)
-            self.all_images += [img]
-
-
-            self.all_masks += []
-            self.all_intrinsics.append(self.intrinsics)
-            # - transform from world system to ref-camera system
-            self.all_w2cs.append(w2c @ np.linalg.inv(self.ref_w2c))
-            
-        self.all_images = torch.from_numpy(np.stack(self.all_images)).to(torch.float32)
-        self.all_intrinsics = torch.from_numpy(np.stack(self.all_intrinsics)).to(torch.float32)
-        self.all_w2cs = torch.from_numpy(np.stack(self.all_w2cs)).to(torch.float32)
-        # self.img_wh = [self.img_wh[0] - self.clip_wh[0] - self.clip_wh[2],
-        #                self.img_wh[1] - self.clip_wh[1] - self.clip_wh[3]]
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-        center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def scale_cam_info(self):
-        new_intrinsics = []
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        for idx in range(len(self.all_images)):
-
-            intrinsics = self.all_intrinsics[idx]
-            # import ipdb; ipdb.set_trace()
-            P = intrinsics @ self.all_w2cs[idx] @ self.scale_mat
-            P = P.cpu().numpy()[:3, :4]
-
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            new_intrinsics.append(intrinsics)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsics[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-
-        new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars = \
-            np.stack(new_intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), \
-            np.stack(new_affine_mats), np.stack(new_near_fars)
-
-        new_intrinsics = torch.from_numpy(np.float32(new_intrinsics))
-        new_w2cs = torch.from_numpy(np.float32(new_w2cs))
-        new_c2ws = torch.from_numpy(np.float32(new_c2ws))
-        new_affine_mats = torch.from_numpy(np.float32(new_affine_mats))
-        new_near_fars = torch.from_numpy(np.float32(new_near_fars))
-
-        return new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars
-
-    def load_poses_all(self, file=f"transforms_train.json"):
-        with open(os.path.join(self.root_dir, file), 'r') as f:
-            meta = json.load(f)
-
-        c2ws = []
-        for i,frame in enumerate(meta['frames']):
-            c2ws.append(np.array(frame['transform_matrix']) @ self.blender2opencv)
-        return np.stack(c2ws)
-
-    def define_transforms(self):
-        self.transform = T.ToTensor()
-
-
-
-    def get_conditional_sample(self):
-        sample = {}
-        support_idxs = self.train_img_idx
-
-        sample['images'] = self.all_images[support_idxs]  # (V, 3, H, W)
-        sample['w2cs'] = self.scaled_w2cs[self.train_img_idx]  # (V, 4, 4)
-        sample['c2ws'] = self.scaled_c2ws[self.train_img_idx]  # (V, 4, 4)
-        sample['near_fars'] = self.scaled_near_fars[self.train_img_idx]  # (V, 2)
-        sample['intrinsics'] = self.scaled_intrinsics[self.train_img_idx][:, :3, :3]  # (V, 3, 3)
-        sample['affine_mats'] = self.scaled_affine_mats[self.train_img_idx]  # ! in world space
-
-        # sample['scan'] = self.scan_id
-        sample['scale_factor'] = torch.tensor(self.scale_factor)
-        sample['scale_mat'] = torch.from_numpy(self.scale_mat)
-        sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
-        sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
-        sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
-
-        return sample
-
-
-
-    def __len__(self):
-        if self.split == 'train':
-            return self.n_views * 1000
-        else:
-            return len(self.test_img_idx) * 1000
-
-
-    def __getitem__(self, idx):
-        sample = {}
-
-        if self.split == 'train':
-            render_idx = self.train_img_idx[idx % self.n_views]
-            support_idxs = [idx for idx in self.train_img_idx if idx != render_idx]
-        else:
-            # render_idx = idx % self.n_test_images + self.n_train_images
-            render_idx = self.test_img_idx[idx % len(self.test_img_idx)]
-            support_idxs = [render_idx]
-
-        sample['images'] = self.all_images[support_idxs]  # (V, 3, H, W)
-        sample['w2cs'] = self.scaled_w2cs[support_idxs]  # (V, 4, 4)
-        sample['c2ws'] = self.scaled_c2ws[support_idxs]  # (V, 4, 4)
-        sample['intrinsics'] = self.scaled_intrinsics[support_idxs][:, :3, :3]  # (V, 3, 3)
-        sample['affine_mats'] = self.scaled_affine_mats[support_idxs]  # ! in world space
-        # sample['scan'] = self.scan_id
-        sample['scale_factor'] = torch.tensor(self.scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
-        sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
-        sample['img_index'] = torch.tensor(render_idx)
-
-        # - query image
-        sample['query_image'] = self.all_images[render_idx]
-        sample['query_c2w'] = self.scaled_c2ws[render_idx]
-        sample['query_w2c'] = self.scaled_w2cs[render_idx]
-        sample['query_intrinsic'] = self.scaled_intrinsics[render_idx]
-        sample['query_near_far'] = self.scaled_near_fars[render_idx]
-        # sample['meta'] = str(self.scan_id) + "_" + os.path.basename(self.images_list[render_idx])
-        sample['scale_mat'] = torch.from_numpy(self.scale_mat)
-        sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
-        sample['rendering_c2ws'] = self.scaled_c2ws[self.test_img_idx]
-        sample['rendering_imgs_idx'] = torch.Tensor(np.array(self.test_img_idx).astype(np.int32))
-
-        # - generate rays
-        if self.split == 'val' or self.split == 'test':
-            sample_rays = gen_rays_from_single_image(
-                self.img_wh[1], self.img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=None,
-                mask=None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                self.img_wh[1], self.img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=None,
-                mask=None,
-                dilated_mask=None,
-                importance_sample=False,
-                h_patch_size=self.h_patch_size
-            )
-
-        sample['rays'] = sample_rays
-
-        return sample
\ No newline at end of file
diff --git a/SparseNeuS_demo_v1/data/blender_general.py b/SparseNeuS_demo_v1/data/blender_general.py
deleted file mode 100644
index 871bcd6e9e2542110213e34ac5e7bde97184d938..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general.py
+++ /dev/null
@@ -1,432 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        depth_h = np.array(read_pfm(filename)[0], dtype=np.float32)  # (1200, 1600)
-        depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
-                             interpolation=cv2.INTER_NEAREST)  # (600, 800)
-        depth_h = depth_h[44:556, 80:720]  # (512, 640)
-        depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
-                             interpolation=cv2.INTER_NEAREST)
-        depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
-                           interpolation=cv2.INTER_NEAREST)
-
-        return depth, depth_h
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 8*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        depth_h = cv2.imread(filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 65535 * 1.4 + 0.5
-
-        depth_h[depth_h < near_bound+1e-3] = 0.0
-
-        depth = {}
-        for l in range(3):
-            depth[f"level_{l}"] = cv2.resize(
-                depth_h,
-                None,
-                fx=1.0 / (2**l),
-                fy=1.0 / (2**l),
-                interpolation=cv2.INTER_NEAREST,
-            )
-
-        if self.split == "train":
-            cutout = np.ones_like(depth[f"level_2"])
-            h0 = int(np.random.randint(0, high=cutout.shape[0] // 5, size=1))
-            h1 = int(
-                np.random.randint(
-                    4 * cutout.shape[0] // 5, high=cutout.shape[0], size=1
-                )
-            )
-            w0 = int(np.random.randint(0, high=cutout.shape[1] // 5, size=1))
-            w1 = int(
-                np.random.randint(
-                    4 * cutout.shape[1] // 5, high=cutout.shape[1], size=1
-                )
-            )
-            cutout[h0:h1, w0:w1] = 0
-            depth_aug = depth[f"level_2"] * cutout
-        else:
-            depth_aug = depth[f"level_2"].copy()
-
-        return depth, depth_h, depth_aug
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_uid_dict = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-        # idx = idx % 8
-        # uid = 'c40d63d5d740405e91c7f5fce855076e'
-        # folder_id = '000-123'
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
-
-        depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        src_views = range(8+idx*4, 8+(idx+1)*4)
-
-
-        for vid in src_views:
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_{vid%4}_10.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-        # print(scale_mat)
-        # print(scale_factor)
-        # ! calculate the new w2cs after scaling
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_12_narrow.py b/SparseNeuS_demo_v1/data/blender_general_12_narrow.py
deleted file mode 100644
index bb1183fb695101bac1f8f33da9438a84378b3dca..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_12_narrow.py
+++ /dev/null
@@ -1,427 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        self.root_dir = root_dir
-        self.split = split
-        self.imgs_per_instance = 12
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/narrow_12_split_upd.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path_narrow_8 = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path_narrow_8, 'r') as f:
-            narrow_8_meta = json.load(f)
-
-        pose_json_path_narrow_4 = "/objaverse-processed/zero12345_img/zero12345_2stage_12_pose.json"
-        with open(pose_json_path_narrow_4, 'r') as f:
-            narrow_4_meta = json.load(f)
-
-
-        self.img_ids = list(narrow_8_meta["c2ws"].keys()) + list(narrow_4_meta["c2ws"].keys()) # (8 + 8*4) + (4 + 4*4)
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(narrow_8_meta["c2ws"].values()) + list(narrow_4_meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        assert narrow_8_meta["intrinsics"] == narrow_4_meta["intrinsics"], "intrinsics not equal"
-        intrinsic[:3, :3] = np.array(narrow_8_meta["intrinsics"])
-        self.intrinsic = intrinsic
-        assert narrow_8_meta["near_far"] == narrow_4_meta["near_far"], "near_far not equal"
-        self.near_far = np.array(narrow_8_meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-
-
-
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return self.imgs_per_instance*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-        idx_original=idx
-
-        folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
-        
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-        
-        idx = idx % self.imgs_per_instance # [0, 11]
-        if idx < 8:
-            # target view
-            c2w = self.c2ws[idx]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
-
-            depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-            img = Image.open(img_filename)
-
-            img = self.transform(img)  # (4, h, w)
-        else:
-            # target view
-            c2w = self.c2ws[idx-8+40]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}.png')
-
-            depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-            img = Image.open(img_filename)
-
-            img = self.transform(img)  # (4, h, w)
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-        
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-
-        src_views = range(8, 8 + 8 * 4 + 4 + 4*4)
-        src_views_used = []
-        skipped_idx = [40, 41, 42, 43]
-        for vid in src_views:
-            if vid in skipped_idx:
-                continue
-
-            src_views_used.append(vid)
-            cur_view_id = (vid - 8) // 4  # [0, 7]
-
-            # choose narrow
-            if cur_view_id < 8:
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
-            else: # choose 2-stage
-                cur_view_id = cur_view_id - 1
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12", folder_id, uid, f'view_{cur_view_id}_{vid%4}.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-           
-        
-
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        # print("img numeber: ", len(imgs))
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx_original % self.imgs_per_instance] + src_views_used
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        if view_ids[0] < 8:
-            meta_end = "_narrow"+ "_refview" + str(view_ids[0])
-        else:
-            meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_12_narrow_8.py b/SparseNeuS_demo_v1/data/blender_general_12_narrow_8.py
deleted file mode 100644
index 467dc5d4d1df3b6d3c8aa4384a1048bec9910973..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_12_narrow_8.py
+++ /dev/null
@@ -1,427 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        self.root_dir = root_dir
-        self.split = split
-        self.imgs_per_instance = 8
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/narrow_12_split_upd.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path_narrow_8 = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path_narrow_8, 'r') as f:
-            narrow_8_meta = json.load(f)
-
-        pose_json_path_narrow_4 = "/objaverse-processed/zero12345_img/zero12345_2stage_12_pose.json"
-        with open(pose_json_path_narrow_4, 'r') as f:
-            narrow_4_meta = json.load(f)
-
-
-        self.img_ids = list(narrow_8_meta["c2ws"].keys()) + list(narrow_4_meta["c2ws"].keys()) # (8 + 8*4) + (4 + 4*4)
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(narrow_8_meta["c2ws"].values()) + list(narrow_4_meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        assert narrow_8_meta["intrinsics"] == narrow_4_meta["intrinsics"], "intrinsics not equal"
-        intrinsic[:3, :3] = np.array(narrow_8_meta["intrinsics"])
-        self.intrinsic = intrinsic
-        assert narrow_8_meta["near_far"] == narrow_4_meta["near_far"], "near_far not equal"
-        self.near_far = np.array(narrow_8_meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-
-
-
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return self.imgs_per_instance*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-        idx_original=idx
-
-        folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
-        
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-        
-        idx = idx % self.imgs_per_instance # [0, 11]
-        if idx < 8:
-            # target view
-            c2w = self.c2ws[idx]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
-
-            depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-            img = Image.open(img_filename)
-
-            img = self.transform(img)  # (4, h, w)
-        else:
-            # target view
-            c2w = self.c2ws[idx-8+40]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}.png')
-
-            depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-            img = Image.open(img_filename)
-
-            img = self.transform(img)  # (4, h, w)
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-        
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-
-        src_views = range(8, 8 + 8 * 4 + 4 + 4*4)
-        src_views_used = []
-        skipped_idx = [40, 41, 42, 43]
-        for vid in src_views:
-            if vid in skipped_idx:
-                continue
-
-            src_views_used.append(vid)
-            cur_view_id = (vid - 8) // 4  # [0, 7]
-
-            # choose narrow
-            if cur_view_id < 8:
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
-            else: # choose 2-stage
-                cur_view_id = cur_view_id - 1
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12", folder_id, uid, f'view_{cur_view_id}_{vid%4}.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-           
-        
-
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        # print("img numeber: ", len(imgs))
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx_original % self.imgs_per_instance] + src_views_used
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        if view_ids[0] < 8:
-            meta_end = "_narrow"+ "_refview" + str(view_ids[0])
-        else:
-            meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_360.py b/SparseNeuS_demo_v1/data/blender_general_360.py
deleted file mode 100644
index 37e8664613a614c03227375d8a0b25224d694bdc..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_360.py
+++ /dev/null
@@ -1,412 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_wide_pose.json"
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-     
-
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        depth_h = np.array(read_pfm(filename)[0], dtype=np.float32)  # (1200, 1600)
-        depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
-                             interpolation=cv2.INTER_NEAREST)  # (600, 800)
-        depth_h = depth_h[44:556, 80:720]  # (512, 640)
-        depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
-                             interpolation=cv2.INTER_NEAREST)
-        depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
-                           interpolation=cv2.INTER_NEAREST)
-
-        return depth, depth_h
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 36*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
- 
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_uid_dict = self.lvis_paths[idx//36]
-
-
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-        idx = idx % 36 # [0, 35]
-        gt_view_idx = idx // 12 # [0, 2]
-        target_view_idx = idx % 12 # [0, 11]
-
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}_gt.png')
-
-        depth_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}_gt_depth_mm.png')
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
-
-        idx_of_12 = idx - 12 * gt_view_idx # idx % 12
-
-        src_views = list(i % 12 + 12 * gt_view_idx for i in range(idx_of_12 - 1-1, idx_of_12 + 2+1))
-        
-
-        for vid in src_views:
-            # if vid == idx:
-            #     continue
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-        # print(scale_mat)
-        # print(scale_factor)
-        # ! calculate the new w2cs after scaling
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx] + list(src_views)
-
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_3.py b/SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_3.py
deleted file mode 100644
index 72ad72bbfb336fa3e0d8b69f74c94afbea1593b7..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_3.py
+++ /dev/null
@@ -1,406 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_2stage_pose.json"
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        depth_h = np.array(read_pfm(filename)[0], dtype=np.float32)  # (1200, 1600)
-        depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
-                             interpolation=cv2.INTER_NEAREST)  # (600, 800)
-        depth_h = depth_h[44:556, 80:720]  # (512, 640)
-        depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
-                             interpolation=cv2.INTER_NEAREST)
-        depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
-                           interpolation=cv2.INTER_NEAREST)
-
-        return depth, depth_h
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 6*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
- 
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_uid_dict = self.lvis_paths[idx//6]
-        idx = idx % 6
-
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-        # idx = idx % 24 # [0, 23]
-
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_gt.png')
-
-        depth_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_gt_depth_mm.png')
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
-
-        
-        src_views = range(6+idx*4, 6+(idx+1)*4)
-
-        for vid in src_views:
-            # if vid == idx:
-            #     continue
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_{vid % 4}.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-        # print(scale_mat)
-        # print(scale_factor)
-        # ! calculate the new w2cs after scaling
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx] + list(src_views)
-
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_4.py b/SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_4.py
deleted file mode 100644
index 380706615bfe4a183b302f127af9913bfc2f4790..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_4.py
+++ /dev/null
@@ -1,411 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_2stage_5pred_pose.json"
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        depth_h = np.array(read_pfm(filename)[0], dtype=np.float32)  # (1200, 1600)
-        depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
-                             interpolation=cv2.INTER_NEAREST)  # (600, 800)
-        depth_h = depth_h[44:556, 80:720]  # (512, 640)
-        depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
-                             interpolation=cv2.INTER_NEAREST)
-        depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
-                           interpolation=cv2.INTER_NEAREST)
-
-        return depth, depth_h
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 6*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
- 
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_uid_dict = self.lvis_paths[idx//6]
-        idx = idx % 6
-
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-        # idx = idx % 24 # [0, 23]
-
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage", folder_id, uid, f'view_0_{idx}_gt.png')
-
-        depth_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage", folder_id, uid, f'view_0_{idx}_gt_depth_mm.png')
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-        # print("img_pre", img.shape)
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        # print("img", img.shape)
-        imgs += [img]
-
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-        # print("depth_h", depth_h.shape)
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
-
-        
-        src_views = range(6+idx*4, 6+(idx+1)*4)
-
-        for vid in src_views:
-            # if vid == idx:
-            #     continue
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_{vid % 4 + 1}.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            # print("img shape1: ", img.shape)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            # print("img shape2: ", img.shape)
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-        # print(scale_mat)
-        # print(scale_factor)
-        # ! calculate the new w2cs after scaling
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        # print("imgs: ", len(imgs))
-        # print("img1 shape:", imgs[0].shape)
-        # print("img2 shape:", imgs[1].shape)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx] + list(src_views)
-
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_4_narrow_and_4_2_stage_mix.py b/SparseNeuS_demo_v1/data/blender_general_4_narrow_and_4_2_stage_mix.py
deleted file mode 100644
index beb1f976907680936b20b37d76133589804d40c5..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_4_narrow_and_4_2_stage_mix.py
+++ /dev/null
@@ -1,480 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        self.root_dir = root_dir
-        self.split = split
-        self.imgs_per_instance = 16
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path_narrow, 'r') as f:
-            narrow_meta = json.load(f)
-
-        pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
-        with open(pose_json_path_two_stage, 'r') as f:
-            two_stage_meta = json.load(f)
-
-
-        self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4)
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
-        intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
-        self.intrinsic = intrinsic
-        assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
-        self.near_far = np.array(narrow_meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-
-
-
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return self.imgs_per_instance * len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-        idx_original=idx
-
-        folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
-        
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-        if idx % 2 == 0:
-            valid_list = [0, 2, 4, 6]
-        else:
-            valid_list = [1, 3, 5, 7]
-
-        if idx % 16 < 8:
-            idx = idx % 16 # [0, 7]
-            # target view
-            c2w = self.c2ws[idx]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
-
-            depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-            
-
-            img = Image.open(img_filename)
-
-            img = self.transform(img)  # (4, h, w)
-            
-
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            imgs += [img]
-
-            depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-            mask_h = depth_h > 0
-            # print("valid pixels", np.sum(mask_h))
-            directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-            surface_points = directions * depth_h[..., None]  # [H, W, 3]
-            distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-            depth_h = distance
-
-
-            depths_h.append(depth_h)
-            masks_h.append(mask_h)
-            
-            intrinsic = self.intrinsic
-            intrinsics.append(intrinsic)
-        
-
-            near_fars.append(self.near_fars[idx])
-            image_perm = 0  # only supervised on reference view
-
-            mask_dilated = None
-
-            # src_views = range(8+idx*4, 8+(idx+1)*4)
-
-            src_views = range(8, 8 + 8 * 4)
-            src_views_used = []
-            for vid in src_views:
-                view_dix_to_use = (vid - 8) // 4
-                if view_dix_to_use not in valid_list:
-                    continue
-                src_views_used.append(vid)
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
-
-                img = Image.open(img_filename)
-                img_wh = self.img_wh
-
-                img = self.transform(img)
-                if img.shape[0] == 4:
-                    img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-                imgs += [img]
-                depth_h = np.ones(img.shape[1:], dtype=np.float32)
-                depths_h.append(depth_h)
-                masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-                near_fars.append(self.all_near_fars[vid])
-                intrinsics.append(self.all_intrinsics[vid])
-
-                w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        else:
-            idx = idx % 16 - 8 # [0, 7]
-       
-            c2w = self.c2ws[idx + 40]
-            w2c = np.linalg.inv(c2w) 
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
-
-
-
-            img = Image.open(img_filename)
-
-            img = self.transform(img)  # (4, h, w)
-            
-            # print("img_pre", img.shape)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            # print("img", img.shape)
-            imgs += [img]
-
-
-            depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-            depth_h = depth_h.fill_(-1.0)
-            # depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0)
-            # print("depth_h", depth_h.shape)
-            mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
-            depths_h.append(depth_h)
-            masks_h.append(mask_h)
-            
-            intrinsic = self.intrinsic
-            intrinsics.append(intrinsic)
-        
-
-            near_fars.append(self.near_fars[idx])
-            image_perm = 0  # only supervised on reference view
-
-            mask_dilated = None
-
-
-            
-            src_views = range(40+8, 40+8+32)
-            src_views_used = []
-            for vid in src_views:
-                view_dix_to_use = (vid - 40 - 8) // 4
-                if view_dix_to_use not in valid_list:
-                    continue
-                src_views_used.append(vid)
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-48) % 4 + 1}.png')
-
-                img = Image.open(img_filename)
-                img_wh = self.img_wh
-
-                img = self.transform(img)
-                # print("img shape1: ", img.shape)
-                if img.shape[0] == 4:
-                    img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-                # print("img shape2: ", img.shape)
-                imgs += [img]
-                depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-                depth_h = depth_h.fill_(-1.0)
-                depths_h.append(depth_h)
-                masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-                near_fars.append(self.all_near_fars[vid])
-                intrinsics.append(self.all_intrinsics[vid])
-
-                w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)            
-        
-
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        # print("img numeber: ", len(imgs))
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx_original % self.imgs_per_instance] + src_views_used
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        if view_ids[0] < 8:
-            meta_end = "_narrow"+ "_refview" + str(view_ids[0])
-        else:
-            meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_4_narrow_and_6_2_stage_mix.py b/SparseNeuS_demo_v1/data/blender_general_4_narrow_and_6_2_stage_mix.py
deleted file mode 100644
index e80567fe34ee51cb49355ee26ea8ce80dff706e6..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_4_narrow_and_6_2_stage_mix.py
+++ /dev/null
@@ -1,476 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path_narrow, 'r') as f:
-            narrow_meta = json.load(f)
-
-        pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_5pred_pose.json"
-        with open(pose_json_path_two_stage, 'r') as f:
-            two_stage_meta = json.load(f)
-
-
-        self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (6 + 6*4)
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
-        intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
-        self.intrinsic = intrinsic
-        assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
-        self.near_far = np.array(narrow_meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-
-
-
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 12*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-        idx_original=idx
-
-        folder_uid_dict = self.lvis_paths[idx//12]
-        
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-        if idx % 12 < 8:
-            idx = idx % 12 # [0, 7]
-            # target view
-            c2w = self.c2ws[idx]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
-
-            depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-            
-
-            img = Image.open(img_filename)
-
-            img = self.transform(img)  # (4, h, w)
-            
-
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            imgs += [img]
-
-            depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-            mask_h = depth_h > 0
-            # print("valid pixels", np.sum(mask_h))
-            directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-            surface_points = directions * depth_h[..., None]  # [H, W, 3]
-            distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-            depth_h = distance
-
-
-            depths_h.append(depth_h)
-            masks_h.append(mask_h)
-            
-            intrinsic = self.intrinsic
-            intrinsics.append(intrinsic)
-        
-
-            near_fars.append(self.near_fars[idx])
-            image_perm = 0  # only supervised on reference view
-
-            mask_dilated = None
-
-            # src_views = range(8+idx*4, 8+(idx+1)*4)
-
-            src_views = range(8, 8 + 8 * 4)
-            src_views_used = []
-            for vid in src_views:
-                if (vid // 4) % 2 != idx % 2:
-                    continue
-                src_views_used.append(vid)
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
-
-                img = Image.open(img_filename)
-                img_wh = self.img_wh
-
-                img = self.transform(img)
-                if img.shape[0] == 4:
-                    img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-                imgs += [img]
-                depth_h = np.ones(img.shape[1:], dtype=np.float32)
-                depths_h.append(depth_h)
-                masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-                near_fars.append(self.all_near_fars[vid])
-                intrinsics.append(self.all_intrinsics[vid])
-
-                w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        else:
-            idx = idx % 12 - 8 # [0, 5]
-            valid_list = [0, 2, 3, 5]
-            idx = valid_list[idx]    # [0, 3]
-            c2w = self.c2ws[idx + 40]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_5pred/", folder_id, uid, f'view_0_{idx}_0.png')
-
-
-
-            img = Image.open(img_filename)
-
-            img = self.transform(img)  # (4, h, w)
-            
-            # print("img_pre", img.shape)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            # print("img", img.shape)
-            imgs += [img]
-
-
-            depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-            depth_h = depth_h.fill_(-1.0)
-            # depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0)
-            # print("depth_h", depth_h.shape)
-            mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
-            depths_h.append(depth_h)
-            masks_h.append(mask_h)
-            
-            intrinsic = self.intrinsic
-            intrinsics.append(intrinsic)
-        
-
-            near_fars.append(self.near_fars[idx])
-            image_perm = 0  # only supervised on reference view
-
-            mask_dilated = None
-
-            # src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
-
-            
-            src_views = range(40+6, 40+6+24)
-            src_views_used = []
-            for vid in src_views:
-                view_dix_to_use = (vid - 40 - 6) // 4
-                if view_dix_to_use not in valid_list:
-                    continue
-                src_views_used.append(vid)
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_5pred/", folder_id, uid, f'view_0_{idx}_{(vid-46) % 4 + 1}.png')
-
-                img = Image.open(img_filename)
-                img_wh = self.img_wh
-
-                img = self.transform(img)
-                # print("img shape1: ", img.shape)
-                if img.shape[0] == 4:
-                    img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-                # print("img shape2: ", img.shape)
-                imgs += [img]
-                depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-                depth_h = depth_h.fill_(-1.0)
-                depths_h.append(depth_h)
-                masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-                near_fars.append(self.all_near_fars[vid])
-                intrinsics.append(self.all_intrinsics[vid])
-
-                w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)            
-        
-
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        # print("img numeber: ", len(imgs))
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx_original % 12] + src_views_used
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        if view_ids[0] < 8:
-            meta_end = "_narrow"+ "_refview" + str(view_ids[0])
-        else:
-            meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_6_narrow_and_6_2_stage_blend_mix.py b/SparseNeuS_demo_v1/data/blender_general_6_narrow_and_6_2_stage_blend_mix.py
deleted file mode 100644
index 248e9f9591b95a711406b0e1efb3568e05e2414a..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_6_narrow_and_6_2_stage_blend_mix.py
+++ /dev/null
@@ -1,449 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        self.root_dir = root_dir
-        self.split = split
-        if self.split == 'train':
-            self.imgs_per_instance = 12
-        else:
-            self.imgs_per_instance = 16
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path_narrow, 'r') as f:
-            narrow_meta = json.load(f)
-
-        pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
-        with open(pose_json_path_two_stage, 'r') as f:
-            two_stage_meta = json.load(f)
-
-
-        self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4)
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
-        intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
-        self.intrinsic = intrinsic
-        assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
-        self.near_far = np.array(narrow_meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-
-
-
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return self.imgs_per_instance*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-        idx_original=idx
-
-        folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
-        
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-        
-        if self.split == 'train':
-            if idx == 4:
-                idx = 5
-            elif idx == 5:
-                idx = 7
-            elif idx == 10:
-                idx = 13
-            elif idx == 11:
-                idx = 15
-
-        if idx % 16 < 8: # narrow image as target
-            idx = idx % 16 # [0, 7]
-            # target view
-            c2w = self.c2ws[idx]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
-
-            depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-            
-
-            img = Image.open(img_filename)
-
-            img = self.transform(img)  # (4, h, w)
-            
-
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            imgs += [img]
-
-            depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-            mask_h = depth_h > 0
-            # print("valid pixels", np.sum(mask_h))
-            directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-            surface_points = directions * depth_h[..., None]  # [H, W, 3]
-            distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-            depth_h = distance
-
-        else:
-            idx = idx % 16 - 8 # [0, 5]
-            c2w = self.c2ws[idx + 40]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
-
-            img = Image.open(img_filename)
-            img = self.transform(img)  # (4, h, w)
-            
-            # print("img_pre", img.shape)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            # print("img", img.shape)
-            imgs += [img]
-
-            depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-            depth_h = depth_h.fill_(-1.0)
-
-            mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-        if_use_narrow = []
-        if self.split == 'train':
-            for i in range(8):
-                if np.random.random() > 0.5:
-                    if_use_narrow.append(True) # use narrow
-                else:
-                    if_use_narrow.append(False) # 2-stage prediction
-            if_use_narrow[origin_idx % 8] = True if origin_idx < 8 else False
-        else:
-            for i in range(8):
-                if_use_narrow.append( True if origin_idx < 8 else False)
-        src_views = range(8, 8 + 8 * 4)
-        src_views_used = []
-        for vid in src_views:
-            if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6):
-                continue
-            src_views_used.append(vid)
-            cur_view_id = (vid - 8) // 4
-            # choose narrow
-            if if_use_narrow[cur_view_id]:
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
-            else: # choose 2-stage
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{(vid - 8) // 4}_{(vid-8) % 4 + 1}.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx_original % self.imgs_per_instance] + src_views_used
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        if view_ids[0] < 8:
-            meta_end = "_narrow"+ "_refview" + str(view_ids[0])
-        else:
-            meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_8_2_stage.py b/SparseNeuS_demo_v1/data/blender_general_8_2_stage.py
deleted file mode 100644
index e1fd371e5fc7be9685b81efa3d607018b2a9bdb1..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_8_2_stage.py
+++ /dev/null
@@ -1,396 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        self.root_dir = root_dir
-        self.split = split
-
-        self.imgs_per_instance = 8
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path_narrow, 'r') as f:
-            narrow_meta = json.load(f)
-
-        pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
-        with open(pose_json_path_two_stage, 'r') as f:
-            two_stage_meta = json.load(f)
-
-
-        self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
-        intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
-        self.intrinsic = intrinsic
-        assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
-        self.near_far = np.array(narrow_meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-
-
-
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return self.imgs_per_instance * len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-        idx_original=idx
-
-        folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
-        
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-        idx = idx % self.imgs_per_instance # [0, 7]
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
-
-        depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-
-        
-        src_views = range(8, 8+32)
-        src_views_used = []
-        for vid in src_views:
-            view_dix_to_use = (vid - 8) // 4
-            src_views_used.append(vid)
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-8) % 4 + 1}.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            imgs += [img]
-            depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-            depth_h = depth_h.fill_(-1.0)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)            
-        
-
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-
-            new_depths_h.append(depth * scale_factor)
-
-
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx_original % self.imgs_per_instance] + src_views_used
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_8_4_gt.py b/SparseNeuS_demo_v1/data/blender_general_8_4_gt.py
deleted file mode 100644
index b1072d6a3e02f1908add474963aa6c6acaf69055..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_8_4_gt.py
+++ /dev/null
@@ -1,396 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        self.root_dir = root_dir
-        self.split = split
-
-        self.imgs_per_instance = 8
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path_narrow, 'r') as f:
-            narrow_meta = json.load(f)
-
-        pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
-        with open(pose_json_path_two_stage, 'r') as f:
-            two_stage_meta = json.load(f)
-
-
-        self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
-        intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
-        self.intrinsic = intrinsic
-        assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
-        self.near_far = np.array(narrow_meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-
-
-
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return self.imgs_per_instance * len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-        idx_original=idx
-
-        folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
-        
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-        idx = idx % self.imgs_per_instance # [0, 7]
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
-
-        depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-
-        
-        src_views = range(8, 8+32)
-        src_views_used = []
-        for vid in src_views:
-            view_dix_to_use = (vid - 8) // 4
-            src_views_used.append(vid)
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10_gt.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            imgs += [img]
-            depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-            depth_h = depth_h.fill_(-1.0)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)            
-        
-
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-
-            new_depths_h.append(depth * scale_factor)
-
-
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx_original % self.imgs_per_instance] + src_views_used
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_3_views.py b/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_3_views.py
deleted file mode 100644
index fa97eb6ca99c254548e501f2e05d883f2b015e1c..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_3_views.py
+++ /dev/null
@@ -1,446 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        self.root_dir = root_dir
-        self.split = split
-        self.imgs_per_instance = 16
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path_narrow, 'r') as f:
-            narrow_meta = json.load(f)
-
-        pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
-        with open(pose_json_path_two_stage, 'r') as f:
-            two_stage_meta = json.load(f)
-
-
-        self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4)
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
-        intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
-        self.intrinsic = intrinsic
-        assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
-        self.near_far = np.array(narrow_meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-
-
-
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return self.imgs_per_instance*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-        idx_original=idx
-
-        folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
-        
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-        
-        if idx % 16 < 8: # narrow image as target
-            idx = idx % self.imgs_per_instance # [0, 7]
-            # target view
-            c2w = self.c2ws[idx]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
-
-            depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-            
-
-            img = Image.open(img_filename)
-
-            img = self.transform(img)  # (4, h, w)
-            
-
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            imgs += [img]
-
-            depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-            mask_h = depth_h > 0
-            # print("valid pixels", np.sum(mask_h))
-            directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-            surface_points = directions * depth_h[..., None]  # [H, W, 3]
-            distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-            depth_h = distance
-
-        else:
-            idx = idx % self.imgs_per_instance - 8 # [0, 5]
-            c2w = self.c2ws[idx + 40]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
-
-
-            img = Image.open(img_filename)
-            img = self.transform(img)  # (4, h, w)
-            
-            # print("img_pre", img.shape)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            # print("img", img.shape)
-            imgs += [img]
-
-            depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-            depth_h = depth_h.fill_(-1.0)
-
-            mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-        if_use_narrow = []
-        if self.split == 'train':
-            for i in range(8):
-                if np.random.random() > 0.5:
-                    if_use_narrow.append(True) # use narrow
-                else:
-                    if_use_narrow.append(False) # 2-stage prediction
-            if_use_narrow[origin_idx % 8] = True if origin_idx < 8 else False
-        else:
-            for i in range(8):
-                if_use_narrow.append( True if origin_idx < 8 else False)
-
-        src_views = list()
-        for i in range(8):
-            # randomly choose 3 different number from [0,3]
-            local_idxs = np.random.choice(4, 3, replace=False)
-            local_idxs = [0,1,2]
-            local_idxs = [8+i*4+local_idx for local_idx in local_idxs]
-            src_views += local_idxs
-        src_views_used = []
-        for vid in src_views:
-            src_views_used.append(vid)
-            cur_view_id = (vid - 8) // 4
-            # choose narrow
-            if if_use_narrow[cur_view_id]:
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
-            else: # choose 2-stage
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{(vid - 8) // 4}_{(vid-8) % 4 + 1}.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-           
-        
-
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        # print("img numeber: ", len(imgs))
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx_original % self.imgs_per_instance] + src_views_used
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        if view_ids[0] < 8:
-            meta_end = "_narrow"+ "_refview" + str(view_ids[0])
-        else:
-            meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_mix.py b/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_mix.py
deleted file mode 100644
index 740bb81125a297fc1d504f4c119c7f9a76630507..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_mix.py
+++ /dev/null
@@ -1,439 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        self.root_dir = root_dir
-        self.split = split
-        self.imgs_per_instance = 16
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path_narrow, 'r') as f:
-            narrow_meta = json.load(f)
-
-        pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
-        with open(pose_json_path_two_stage, 'r') as f:
-            two_stage_meta = json.load(f)
-
-
-        self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
-        intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
-        self.intrinsic = intrinsic
-        assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
-        self.near_far = np.array(narrow_meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-
-
-
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return self.imgs_per_instance*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-        idx_original=idx
-
-        folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
-        
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-        
-        if idx % 16 < 8: # gt image as target
-            idx = idx % self.imgs_per_instance # [0, 7]
-            # target view
-            c2w = self.c2ws[idx]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
-
-            depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-            
-
-            img = Image.open(img_filename)
-
-            img = self.transform(img)  # (4, h, w)
-            
-
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            imgs += [img]
-
-            depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-            mask_h = depth_h > 0
-            # print("valid pixels", np.sum(mask_h))
-            directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-            surface_points = directions * depth_h[..., None]  # [H, W, 3]
-            distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-            depth_h = distance
-
-        else:
-            idx = idx % self.imgs_per_instance - 8 # [0, 7]
-            c2w = self.c2ws[idx + 40]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
-
-
-            img = Image.open(img_filename)
-            img = self.transform(img)  # (4, h, w)
-            
-            # print("img_pre", img.shape)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            # print("img", img.shape)
-            imgs += [img]
-
-            depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-            depth_h = depth_h.fill_(-1.0)
-
-            mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-        
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-        if_use_narrow = []
-        if self.split == 'train':
-            for i in range(8):
-                if np.random.random() > 0.5:
-                    if_use_narrow.append(True) # use narrow
-                else:
-                    if_use_narrow.append(False) # 2-stage prediction
-            if_use_narrow[origin_idx % 8] = True if (origin_idx % 16) < 8 else False
-        else:
-            for i in range(8):
-                if_use_narrow.append( True if (origin_idx % 16) < 8 else False)
-        src_views = range(8, 8 + 8 * 4)
-        src_views_used = []
-        for vid in src_views:
-            src_views_used.append(vid)
-            cur_view_id = (vid - 8) // 4  # [0, 7]
-            # choose narrow
-            if if_use_narrow[cur_view_id]:
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
-            else: # choose 2-stage
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{cur_view_id}_{(vid) % 4 + 1}.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-           
-        
-
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        # print("img numeber: ", len(imgs))
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx_original % self.imgs_per_instance] + src_views_used
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        if view_ids[0] < 8:
-            meta_end = "_narrow"+ "_refview" + str(view_ids[0])
-        else:
-            meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_mix.py b/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_mix.py
deleted file mode 100644
index 6d860e521935b529c4240a0299d892ff90f683b2..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_mix.py
+++ /dev/null
@@ -1,470 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        self.root_dir = root_dir
-        self.split = split
-        self.imgs_per_instance = 16
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path_narrow, 'r') as f:
-            narrow_meta = json.load(f)
-
-        pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
-        with open(pose_json_path_two_stage, 'r') as f:
-            two_stage_meta = json.load(f)
-
-
-        self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
-        intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
-        self.intrinsic = intrinsic
-        assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
-        self.near_far = np.array(narrow_meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-
-
-
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return self.imgs_per_instance * len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-        idx_original=idx
-
-        folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
-        
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-        if idx % self.imgs_per_instance < 8:
-            idx = idx % self.imgs_per_instance # [0, 7]
-            # target view
-            c2w = self.c2ws[idx]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
-
-            depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-            
-
-            img = Image.open(img_filename)
-
-            img = self.transform(img)  # (4, h, w)
-            
-
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            imgs += [img]
-
-            depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-            mask_h = depth_h > 0
-            # print("valid pixels", np.sum(mask_h))
-            directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-            surface_points = directions * depth_h[..., None]  # [H, W, 3]
-            distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-            depth_h = distance
-
-
-            depths_h.append(depth_h)
-            masks_h.append(mask_h)
-            
-            intrinsic = self.intrinsic
-            intrinsics.append(intrinsic)
-        
-
-            near_fars.append(self.near_fars[idx])
-            image_perm = 0  # only supervised on reference view
-
-            mask_dilated = None
-
-            # src_views = range(8+idx*4, 8+(idx+1)*4)
-
-            src_views = range(8, 8 + 8 * 4)
-            src_views_used = []
-            for vid in src_views:
-                src_views_used.append(vid)
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
-
-                img = Image.open(img_filename)
-                img_wh = self.img_wh
-
-                img = self.transform(img)
-                if img.shape[0] == 4:
-                    img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-                imgs += [img]
-                depth_h = np.ones(img.shape[1:], dtype=np.float32)
-                depths_h.append(depth_h)
-                masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-                near_fars.append(self.all_near_fars[vid])
-                intrinsics.append(self.all_intrinsics[vid])
-
-                w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        else:
-            idx = idx % self.imgs_per_instance - 8 # [0, 5]
-       
-            c2w = self.c2ws[idx + 40]
-            w2c = np.linalg.inv(c2w)
-            w2c_ref = w2c
-            w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-            w2cs.append(w2c @ w2c_ref_inv)
-            c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
-
-
-            img = Image.open(img_filename)
-
-            img = self.transform(img)  # (4, h, w)
-            
-            # print("img_pre", img.shape)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            # print("img", img.shape)
-            imgs += [img]
-
-
-            depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-            depth_h = depth_h.fill_(-1.0)
-            # depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0)
-            # print("depth_h", depth_h.shape)
-            mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
-            depths_h.append(depth_h)
-            masks_h.append(mask_h)
-            
-            intrinsic = self.intrinsic
-            intrinsics.append(intrinsic)
-        
-
-            near_fars.append(self.near_fars[idx])
-            image_perm = 0  # only supervised on reference view
-
-            mask_dilated = None
-
-
-            
-            src_views = range(40+8, 40+8+32)
-            src_views_used = []
-            for vid in src_views:
-                view_dix_to_use = (vid - 40 - 8) // 4
-
-                src_views_used.append(vid)
-                img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-48) % 4 + 1}.png')
-
-                img = Image.open(img_filename)
-                img_wh = self.img_wh
-
-                img = self.transform(img)
-                # print("img shape1: ", img.shape)
-                if img.shape[0] == 4:
-                    img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-                # print("img shape2: ", img.shape)
-                imgs += [img]
-                depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-                depth_h = depth_h.fill_(-1.0)
-                depths_h.append(depth_h)
-                masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-                near_fars.append(self.all_near_fars[vid])
-                intrinsics.append(self.all_intrinsics[vid])
-
-                w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)            
-        
-
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        # print("img numeber: ", len(imgs))
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx_original % self.imgs_per_instance] + src_views_used
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        if view_ids[0] < 8:
-            meta_end = "_narrow"+ "_refview" + str(view_ids[0])
-        else:
-            meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_8_wide_from_2_stage.py b/SparseNeuS_demo_v1/data/blender_general_8_wide_from_2_stage.py
deleted file mode 100644
index 9609f20a733486544347d7fec78ae16bf1b9e2a3..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_8_wide_from_2_stage.py
+++ /dev/null
@@ -1,395 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        self.root_dir = root_dir
-        self.split = split
-
-        self.imgs_per_instance = 8
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/random32_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path_narrow, 'r') as f:
-            narrow_meta = json.load(f)
-
-        pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
-        with open(pose_json_path_two_stage, 'r') as f:
-            two_stage_meta = json.load(f)
-
-
-        self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
-        intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
-        self.intrinsic = intrinsic
-        assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
-        self.near_far = np.array(narrow_meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-
-
-
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return self.imgs_per_instance * len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-        idx_original=idx
-
-        folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
-        
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-        idx = idx % self.imgs_per_instance # [0, 7]
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
-
-        depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-
-        
-        src_views = range(0, 8)
-        src_views_used = []
-        for vid in src_views:
-            src_views_used.append(vid)
-            img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{vid}_0.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-            imgs += [img]
-            depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-            depth_h = depth_h.fill_(-1.0)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)            
-        
-
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-
-            new_depths_h.append(depth * scale_factor)
-
-
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx_original % self.imgs_per_instance] + src_views_used
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_4_1_eval_new_data.py b/SparseNeuS_demo_v1/data/blender_general_narrow_4_1_eval_new_data.py
deleted file mode 100644
index bacd68d0d8cc7b578bf546e4484590f985920051..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_4_1_eval_new_data.py
+++ /dev/null
@@ -1,418 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-
-
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[],
-                 specific_dataset_name = 'GSO'
-                 ):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-        # self.specific_dataset_name = 'Realfusion'
-        # self.specific_dataset_name = 'GSO'
-        # self.specific_dataset_name = 'Objaverse' 
-        # self.specific_dataset_name = 'Zero123'
-
-        self.specific_dataset_name = specific_dataset_name
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-        assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
-        # find all subfolders
-        main_folder = os.path.join(root_dir, self.specific_dataset_name)
-        self.shape_list = os.listdir(main_folder)
-        self.shape_list.sort()
-
-        # self.shape_list = ['barrel_render']
-        # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
-
-
-        self.lvis_paths = []
-        for shape_name in self.shape_list:
-            self.lvis_paths.append(os.path.join(main_folder, shape_name))
-
-        # print("lvis_paths: ", self.lvis_paths)
-
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 8*len(self.lvis_paths)
-        # return len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        # idx = idx * 8 # to be deleted
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj-mats between views
-
-        folder_path = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-
-        # last subdir name
-        shape_name = os.path.split(folder_path)[-1]
-
-        pose_json_path = os.path.join(folder_path, "pose.json")
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for image_dix, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[image_dix]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
-        img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
-
-        img = Image.open(img_filename)
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-
-        depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-        depth_h = depth_h.fill_(-1.0)
-        mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-
-        # src_views = range(8, 8 + 8 * 4)
-        src_views = range(8+idx*4, 8+(idx+1)*4)
-        for vid in src_views:
-
-            # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
-            img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-
-
-        target_w2cs = []
-        target_intrinsics = []
-        new_target_w2cs = []
-        for i_idx in range(8):
-            target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
-            target_intrinsics.append(self.all_intrinsics[i_idx])
-
-        for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_target_w2cs.append(w2c)
-        target_w2cs = np.stack(new_target_w2cs)
-
-
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32))  # (8, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = shape_name
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
-        # print("meta: ", sample['meta'])
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_6.py b/SparseNeuS_demo_v1/data/blender_general_narrow_6.py
deleted file mode 100644
index 5d8333986bb15b3e3fd495f1ee4600e22ef93246..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_6.py
+++ /dev/null
@@ -1,399 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        if self.split == 'train':
-            return 6*len(self.lvis_paths)
-        else:
-            return 8*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-        if self.split == 'train':
-            folder_uid_dict = self.lvis_paths[idx//6]
-            idx = idx % 6 # [0, 5]
-            if idx == 4:
-                idx = 5
-            elif idx == 5:
-                idx = 7
-        else:
-            folder_uid_dict = self.lvis_paths[idx//8]
-            idx = idx % 8 # [0, 7]
-
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
-
-        depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(8+idx*4, 8+(idx+1)*4)
-        src_views = range(8, 8 + 8 * 4)
-
-        for vid in src_views:
-            if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6):
-                continue
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-        # print("len(imges)", len(imgs))
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_8_3_fixed.py b/SparseNeuS_demo_v1/data/blender_general_narrow_8_3_fixed.py
deleted file mode 100644
index 58c26348e73b44fdcb33bad81b1fddba66efeffc..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_8_3_fixed.py
+++ /dev/null
@@ -1,393 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 8*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_uid_dict = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
-
-        depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(8+idx*4, 8+(idx+1)*4)
-        src_views = list()
-        for i in range(8):
-            # randomly choose 3 different number from [0,3]
-            # local_idxs = np.random.choice(4, 3, replace=False)
-            local_idxs = [0, 2, 3]
-            # local_idxs = np.random.choice(4, 3, replace=False)
-
-            local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs]
-            src_views += local_idxs
-        for vid in src_views:
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        # print("len(imgs)", len(imgs))
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_8_3_random.py b/SparseNeuS_demo_v1/data/blender_general_narrow_8_3_random.py
deleted file mode 100644
index b52542595e8d39dff91f18e63a0b504c4c4d2d48..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_8_3_random.py
+++ /dev/null
@@ -1,395 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 8*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_uid_dict = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
-
-        depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(8+idx*4, 8+(idx+1)*4)
-        src_views = list()
-        for i in range(8):
-
-            if self.split == 'train':
-                local_idxs = np.random.choice(4, 3, replace=False)
-            else:
-                local_idxs = [0, 2, 3]
-            # local_idxs = np.random.choice(4, 3, replace=False)
-
-            local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs]
-            src_views += local_idxs
-        for vid in src_views:
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        # print("len(imgs)", len(imgs))
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_8_4_random_shading.py b/SparseNeuS_demo_v1/data/blender_general_narrow_8_4_random_shading.py
deleted file mode 100644
index e120367ce96847e9fb60b2ae038a812583fe75e3..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_8_4_random_shading.py
+++ /dev/null
@@ -1,432 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 8*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_uid_dict = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
-
-        depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(8+idx*4, 8+(idx+1)*4)
-        src_views = range(8, 8 + 8 * 4)
-
-        for vid in src_views:
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        if self.split == 'train':
-            # randomly select one view from eight views as reference view
-            idx_to_select = np.random.randint(0, 8)
-            
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx_to_select}.png')
-            img = Image.open(img_filename)
-            img = self.transform(img)  # (4, h, w)
-            
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs[0] = img
-
-            w2c_selected = self.all_extrinsics[idx_to_select] @ w2c_ref_inv
-            P = self.all_intrinsics[idx_to_select] @ w2c_selected @ scale_mat
-            P = P[:3, :4]
-
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = self.all_intrinsics[idx_to_select][:3, :3] @ w2c[:3, :4]
-            new_affine_mats[0] = affine_mat
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-            new_near_fars[0] = [0.95 * near, 1.05 * far]
-
-            new_w2cs[0] = w2c
-            new_c2ws[0] = c2w
-
-            depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx_to_select}_depth_mm.png'))
-            depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-            mask_h = depth_h > 0
-            directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-            surface_points = directions * depth_h[..., None]  # [H, W, 3]
-            distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-            depth_h = distance * scale_factor
-
-            new_depths_h[0] = depth_h 
-            masks_h[0] = mask_h 
-
-
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all.py
deleted file mode 100644
index 50b85d133707e83b36d926b7acf1cb121dd4d04d..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_all.py
+++ /dev/null
@@ -1,386 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 8*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_uid_dict = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
-
-        depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(8+idx*4, 8+(idx+1)*4)
-        src_views = range(8, 8 + 8 * 4)
-
-        for vid in src_views:
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage.py
deleted file mode 100644
index 1b832beccd85c8a0be98edf95f0d244c1cbf8b17..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage.py
+++ /dev/null
@@ -1,410 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 8*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_uid_dict = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
-
-        depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-        # print("img_pre", img.shape)
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        # print("img", img.shape)
-        imgs += [img]
-
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-        # print("depth_h", depth_h.shape)
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(8+idx*4, 8+(idx+1)*4)
-        src_views = range(8, 8 + 8 * 4)
-
-        for vid in src_views:
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{(vid - 8) // 4}_{vid % 4 + 1}.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-
-
-        target_w2cs = []
-        target_intrinsics = []
-        new_target_w2cs = []
-        for i_idx in range(8):
-            target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
-            target_intrinsics.append(self.all_intrinsics[i_idx])
-
-        for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_target_w2cs.append(w2c)
-        target_w2cs = np.stack(new_target_w2cs)
-
-
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32))  # (8, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage_temp.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage_temp.py
deleted file mode 100644
index 5c2dbebd00ed9e0293c26029c97ab77b7880fcf0..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage_temp.py
+++ /dev/null
@@ -1,411 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 10
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        idx = idx * 8
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_uid_dict = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
-
-        depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-        # print("img_pre", img.shape)
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        # print("img", img.shape)
-        imgs += [img]
-
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-        # print("depth_h", depth_h.shape)
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(8+idx*4, 8+(idx+1)*4)
-        src_views = range(8, 8 + 8 * 4)
-
-        for vid in src_views:
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{(vid - 8) // 4}_{vid % 4 + 1}.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-
-
-        target_w2cs = []
-        target_intrinsics = []
-        new_target_w2cs = []
-        for i_idx in range(8):
-            target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
-            target_intrinsics.append(self.all_intrinsics[i_idx])
-
-        for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_target_w2cs.append(w2c)
-        target_w2cs = np.stack(new_target_w2cs)
-
-
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32))  # (8, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data.py
index 194cf007f54d2d377ce6561050f82e38dc246e73..530a434828d4fdb1c4d2439ea9fbdcc40d449ef6 100644
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data.py
+++ b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data.py
@@ -1,6 +1,6 @@
 from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
 import os
+import json
 import numpy as np
 import cv2
 from PIL import Image
@@ -9,12 +9,7 @@ from torchvision import transforms as T
 from data.scene import get_boundingbox
 
 from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
 from kornia import create_meshgrid
-import open3d as o3d
-
 
 def get_ray_directions(H, W, focal, center=None):
     """
@@ -73,10 +68,6 @@ class BlenderPerView(Dataset):
         # print("root_dir: ", root_dir)
         self.root_dir = root_dir
         self.split = split
-        # self.specific_dataset_name = 'Realfusion'
-        # self.specific_dataset_name = 'GSO'
-        # self.specific_dataset_name = 'Objaverse' 
-        # self.specific_dataset_name = 'Zero123'
 
         self.specific_dataset_name = specific_dataset_name
         self.n_views = n_views
@@ -102,8 +93,6 @@ class BlenderPerView(Dataset):
         for shape_name in self.shape_list:
             self.lvis_paths.append(os.path.join(main_folder, shape_name))
 
-        # print("lvis_paths: ", self.lvis_paths)
-
         if img_wh is not None:
             assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
                 'img_wh must both be multiples of 32!'
@@ -130,9 +119,6 @@ class BlenderPerView(Dataset):
             self.all_extrinsics.append(extrinsic)
             self.all_near_fars.append(near_far)
 
-    def read_depth(self, filename):
-        pass
-
     def read_mask(self, filename):
         mask_h = cv2.imread(filename, 0)
         mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
@@ -160,11 +146,6 @@ class BlenderPerView(Dataset):
         # return 8*len(self.lvis_paths)
         return len(self.lvis_paths)
 
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
     def __getitem__(self, idx):
         sample = {}
         idx = idx * 8 # to be deleted
@@ -198,9 +179,8 @@ class BlenderPerView(Dataset):
         self.c2ws = []
         self.w2cs = []
         self.near_fars = []
-        # self.root_dir = root_dir
-        for image_dix, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[image_dix]
+        for image_idx, img_id in enumerate(self.img_ids):
+            pose = self.input_poses[image_idx]
             c2w = pose @ self.blender2opencv
             self.c2ws.append(c2w)
             self.w2cs.append(np.linalg.inv(c2w))
@@ -224,7 +204,6 @@ class BlenderPerView(Dataset):
         w2cs.append(w2c @ w2c_ref_inv)
         c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
 
-        # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
         img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
 
         img = Image.open(img_filename)
@@ -258,7 +237,6 @@ class BlenderPerView(Dataset):
 
         for vid in src_views:
 
-            # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
             img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
             img = Image.open(img_filename)
             img_wh = self.img_wh
@@ -312,7 +290,6 @@ class BlenderPerView(Dataset):
             new_near_fars.append([0.95 * near, 1.05 * far])
             new_depths_h.append(depth * scale_factor)
 
-        # print(new_near_fars)
         imgs = torch.stack(imgs).float()
         depths_h = np.stack(new_depths_h)
         masks_h = np.stack(masks_h)
@@ -360,7 +337,6 @@ class BlenderPerView(Dataset):
         sample['view_ids'] = torch.from_numpy(np.array(view_ids))
         sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
 
-        # sample['light_idx'] = torch.tensor(light_idx)
         sample['scan'] = shape_name
 
         sample['scale_factor'] = torch.tensor(scale_factor)
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data3_1.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data3_1.py
deleted file mode 100644
index 7ce059be019a360b193c526c358057ffc9b48d1a..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data3_1.py
+++ /dev/null
@@ -1,414 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-        # self.specific_dataset_name = 'Realfusion'
-        self.specific_dataset_name = 'Objaverse'
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-        assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
-        # find all subfolders
-        main_folder = os.path.join(root_dir, self.specific_dataset_name)
-        self.shape_list = os.listdir(main_folder)
-        self.shape_list.sort()
-
-        # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
-
-
-        self.lvis_paths = []
-        for shape_name in self.shape_list:
-            self.lvis_paths.append(os.path.join(main_folder, shape_name))
-
-        # print("lvis_paths: ", self.lvis_paths)
-
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        # return 8*len(self.lvis_paths)
-        return len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        idx = idx * 8 # to be deleted
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_path = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-
-        # last subdir name
-        shape_name = os.path.split(folder_path)[-1]
-
-
-        pose_json_path = os.path.join(folder_path, "pose.json")
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for image_dix, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[image_dix]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
-        # print(self.img_ids)
-        img = Image.open(img_filename)
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-
-        depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-        depth_h = depth_h.fill_(-1.0)
-        mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(8+idx*4, 8+(idx+1)*4)
-        src_views = range(8, 8 + 8 * 4)
-
-        for vid in src_views:
-            if vid % 4 == 0:
-                vid = (vid - 8) // 4
-                img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[vid]}')
-            else:
-                img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-
-
-        target_w2cs = []
-        target_intrinsics = []
-        new_target_w2cs = []
-        for i_idx in range(8):
-            target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
-            target_intrinsics.append(self.all_intrinsics[i_idx])
-
-        for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_target_w2cs.append(w2c)
-        target_w2cs = np.stack(new_target_w2cs)
-
-
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32))  # (8, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = shape_name
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
-        # print("meta: ", sample['meta'])
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_32_wide.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_32_wide.py
deleted file mode 100644
index f69ece26bdd88955bf5612f2f6f66ae7f9262e19..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_32_wide.py
+++ /dev/null
@@ -1,465 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-
-def calc_pose(phis, thetas, size, radius = 1.2):
-    import torch
-    def normalize(vectors):
-        return vectors / (torch.norm(vectors, dim=-1, keepdim=True) + 1e-10)
-    # device = torch.device('cuda')
-    thetas = torch.FloatTensor(thetas)
-    phis = torch.FloatTensor(phis)
-    
-    centers = torch.stack([
-        radius * torch.sin(thetas) * torch.sin(phis),
-        -radius * torch.cos(thetas) * torch.sin(phis),
-        radius * torch.cos(phis),
-    ], dim=-1) # [B, 3]
-
-    # lookat
-    forward_vector = normalize(centers).squeeze(0)
-    up_vector = torch.FloatTensor([0, 0, 1]).unsqueeze(0).repeat(size, 1) 
-    right_vector = normalize(torch.cross(up_vector, forward_vector, dim=-1))      
-    if right_vector.pow(2).sum() < 0.01:
-        right_vector = torch.FloatTensor([0, 1, 0]).unsqueeze(0).repeat(size, 1)  
-    up_vector = normalize(torch.cross(forward_vector, right_vector, dim=-1))     
-
-    poses = torch.eye(4, dtype=torch.float)[:3].unsqueeze(0).repeat(size, 1, 1)
-    poses[:, :3, :3] = torch.stack((right_vector, up_vector, forward_vector), dim=-1)
-    poses[:, :3, 3] = centers 
-    return poses
-
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[],
-                 specific_dataset_name = 'GSO'
-                 ):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-        # self.specific_dataset_name = 'Realfusion'
-        # self.specific_dataset_name = 'GSO'
-        # self.specific_dataset_name = 'Objaverse' 
-        # self.specific_dataset_name = 'Zero123'
-
-        self.specific_dataset_name = specific_dataset_name
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-        assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
-        # find all subfolders
-        main_folder = os.path.join(root_dir)
-        self.shape_list = os.listdir(main_folder)
-        self.shape_list.sort()
-
-        # self.shape_list = ['barrel_render']
-        # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
-
-
-        self.lvis_paths = []
-        for shape_name in self.shape_list:
-            self.lvis_paths.append(os.path.join(main_folder, shape_name))
-
-        # print("lvis_paths: ", self.lvis_paths)
-
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid in range(self.input_poses.shape[0]):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        # return 8*len(self.lvis_paths)
-        return len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        idx = idx * 8 # to be deleted
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj-mats between views
-
-        folder_path = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-
-        # last subdir name
-        shape_name = os.path.split(folder_path)[-1]
-
-        # pose_json_path = os.path.join(folder_path, "pose.json")
-        # with open(pose_json_path, 'r') as f:
-        #     meta = json.load(f)
-        
-        # self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        # self.img_wh = (256, 256)
-        # self.input_poses = np.array(list(meta["c2ws"].values()))
-        # intrinsic = np.eye(4)
-        # intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        # self.intrinsic = intrinsic
-        # self.near_far = np.array(meta["near_far"])
-        # self.near_far[1] = 1.8
-        # self.define_transforms()
-        # self.blender2opencv = np.array(
-        #     [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        # )
-
-        pose_file = os.path.join(folder_path, '32_random', 'views.npz')
-        pose_array = np.load(pose_file)
-        pose = calc_pose(pose_array['elevations'], pose_array['azimuths'], 32) # [32, 3, 4] c2ws
-  
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(pose)
-        self.input_poses = np.concatenate([self.input_poses, np.tile(np.array([0, 0, 0, 1], dtype=np.float32)[None, None, :], [self.input_poses.shape[0], 1, 1])], axis=1)
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for image_dix in range(pose.shape[0]):
-            pose = self.input_poses[image_dix]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
-        img_filename = os.path.join(folder_path, '32_random', f'{idx}.png')
-
-        img = Image.open(img_filename)
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-
-        depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-        depth_h = depth_h.fill_(-1.0)
-        mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-
-        src_views = range(0, 8 * 4)
-
-        for vid in src_views:
-
-            # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
-            img_filename = os.path.join(folder_path, '32_random', f'{vid}.png')
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-
-
-        target_w2cs = []
-        target_intrinsics = []
-        new_target_w2cs = []
-        for i_idx in range(8):
-            target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
-            target_intrinsics.append(self.all_intrinsics[i_idx])
-
-        for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_target_w2cs.append(w2c)
-        target_w2cs = np.stack(new_target_w2cs)
-
-
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32))  # (8, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = shape_name
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
-        # print("meta: ", sample['meta'])
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_4_4.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_4_4.py
deleted file mode 100644
index 6263a9ff47edc8f7b65600786c244fafb809240b..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_4_4.py
+++ /dev/null
@@ -1,419 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-
-
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[],
-                 specific_dataset_name = 'GSO'
-                 ):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-        # self.specific_dataset_name = 'Realfusion'
-        # self.specific_dataset_name = 'GSO'
-        # self.specific_dataset_name = 'Objaverse' 
-        # self.specific_dataset_name = 'Zero123'
-
-        self.specific_dataset_name = specific_dataset_name
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-        assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
-        # find all subfolders
-        main_folder = os.path.join(root_dir, self.specific_dataset_name)
-        self.shape_list = os.listdir(main_folder)
-        self.shape_list.sort()
-
-        # self.shape_list = ['barrel_render']
-        # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
-
-
-        self.lvis_paths = []
-        for shape_name in self.shape_list:
-            self.lvis_paths.append(os.path.join(main_folder, shape_name))
-
-        # print("lvis_paths: ", self.lvis_paths)
-
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        # return 8*len(self.lvis_paths)
-        return len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        idx = idx * 8 # to be deleted
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj-mats between views
-
-        folder_path = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-
-        # last subdir name
-        shape_name = os.path.split(folder_path)[-1]
-
-        pose_json_path = os.path.join(folder_path, "pose.json")
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for image_dix, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[image_dix]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
-        img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
-
-        img = Image.open(img_filename)
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-
-        depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-        depth_h = depth_h.fill_(-1.0)
-        mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-
-        src_views = range(8, 8 + 8 * 4)
-
-        for vid in src_views:
-            if (vid // 4) % 2 != 0:
-                continue
-            # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
-            img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-
-
-        target_w2cs = []
-        target_intrinsics = []
-        new_target_w2cs = []
-        for i_idx in range(8):
-            target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
-            target_intrinsics.append(self.all_intrinsics[i_idx])
-
-        for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_target_w2cs.append(w2c)
-        target_w2cs = np.stack(new_target_w2cs)
-
-
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32))  # (8, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = shape_name
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
-        # print("meta: ", sample['meta'])
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_6_4.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_6_4.py
deleted file mode 100644
index c88c0d9b37402f970d9b2d7686b774943366e9a8..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_6_4.py
+++ /dev/null
@@ -1,420 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-
-
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[],
-                 specific_dataset_name = 'GSO'
-                 ):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-        # self.specific_dataset_name = 'Realfusion'
-        # self.specific_dataset_name = 'GSO'
-        # self.specific_dataset_name = 'Objaverse' 
-        # self.specific_dataset_name = 'Zero123'
-
-        self.specific_dataset_name = specific_dataset_name
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-        assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
-        # find all subfolders
-        main_folder = os.path.join(root_dir, self.specific_dataset_name)
-        self.shape_list = os.listdir(main_folder)
-        self.shape_list.sort()
-
-        # self.shape_list = ['barrel_render']
-        # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
-
-
-        self.lvis_paths = []
-        for shape_name in self.shape_list:
-            self.lvis_paths.append(os.path.join(main_folder, shape_name))
-
-        # print("lvis_paths: ", self.lvis_paths)
-
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        # return 8*len(self.lvis_paths)
-        return len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        idx = idx * 8 # to be deleted
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj-mats between views
-
-        folder_path = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-
-        # last subdir name
-        shape_name = os.path.split(folder_path)[-1]
-
-        pose_json_path = os.path.join(folder_path, "pose.json")
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for image_dix, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[image_dix]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
-        img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
-
-        img = Image.open(img_filename)
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-
-        depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-        depth_h = depth_h.fill_(-1.0)
-        mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-
-        src_views = range(8, 8 + 8 * 4)
-
-        for vid in src_views:
-            if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6):
-                continue
-
-            # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
-            img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-
-
-        target_w2cs = []
-        target_intrinsics = []
-        new_target_w2cs = []
-        for i_idx in range(8):
-            target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
-            target_intrinsics.append(self.all_intrinsics[i_idx])
-
-        for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_target_w2cs.append(w2c)
-        target_w2cs = np.stack(new_target_w2cs)
-
-
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32))  # (8, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = shape_name
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
-        # print("meta: ", sample['meta'])
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_3.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_3.py
deleted file mode 100644
index 512c3db02edc8e68208167b7d1715f1f67025cdf..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_3.py
+++ /dev/null
@@ -1,428 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-
-
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[],
-                 specific_dataset_name = 'GSO'
-                 ):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-        # self.specific_dataset_name = 'Realfusion'
-        # self.specific_dataset_name = 'GSO'
-        # self.specific_dataset_name = 'Objaverse' 
-        # self.specific_dataset_name = 'Zero123'
-
-        self.specific_dataset_name = specific_dataset_name
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-        assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
-        # find all subfolders
-        main_folder = os.path.join(root_dir, self.specific_dataset_name)
-        self.shape_list = os.listdir(main_folder)
-        self.shape_list.sort()
-
-        # self.shape_list = ['barrel_render']
-        # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
-
-
-        self.lvis_paths = []
-        for shape_name in self.shape_list:
-            self.lvis_paths.append(os.path.join(main_folder, shape_name))
-
-        # print("lvis_paths: ", self.lvis_paths)
-
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        # return 8*len(self.lvis_paths)
-        return len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        idx = idx * 8 # to be deleted
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj-mats between views
-
-        folder_path = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-
-        # last subdir name
-        shape_name = os.path.split(folder_path)[-1]
-
-        pose_json_path = os.path.join(folder_path, "pose.json")
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for image_dix, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[image_dix]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
-        img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
-
-        img = Image.open(img_filename)
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-
-        depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-        depth_h = depth_h.fill_(-1.0)
-        mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-
-        # src_views = range(8, 8 + 8 * 4)
-
-        src_views = list()
-        for i in range(8):
-            # randomly choose 3 different number from [0,3]
-            # local_idxs = np.random.choice(4, 3, replace=False)
-            local_idxs = [0, 2, 3]
-            # local_idxs = np.random.choice(4, 3, replace=False)
-
-            local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs]
-            src_views += local_idxs
-
-        for vid in src_views:
-
-            # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
-            img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-
-
-        target_w2cs = []
-        target_intrinsics = []
-        new_target_w2cs = []
-        for i_idx in range(8):
-            target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
-            target_intrinsics.append(self.all_intrinsics[i_idx])
-
-        for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_target_w2cs.append(w2c)
-        target_w2cs = np.stack(new_target_w2cs)
-
-
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32))  # (8, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = shape_name
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
-        # print("meta: ", sample['meta'])
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_wide.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_wide.py
deleted file mode 100644
index 3c1a23183a388175c2212bf552fb15ae385737ab..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_wide.py
+++ /dev/null
@@ -1,420 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-
-
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[],
-                 specific_dataset_name = 'GSO'
-                 ):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-        # self.specific_dataset_name = 'Realfusion'
-        # self.specific_dataset_name = 'GSO'
-        # self.specific_dataset_name = 'Objaverse' 
-        # self.specific_dataset_name = 'Zero123'
-
-        self.specific_dataset_name = specific_dataset_name
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-        assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
-        # find all subfolders
-        main_folder = os.path.join(root_dir, self.specific_dataset_name)
-        self.shape_list = os.listdir(main_folder)
-        self.shape_list.sort()
-
-        # self.shape_list = ['barrel_render']
-        # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
-
-
-        self.lvis_paths = []
-        for shape_name in self.shape_list:
-            self.lvis_paths.append(os.path.join(main_folder, shape_name))
-
-        # print("lvis_paths: ", self.lvis_paths)
-
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        # return 8*len(self.lvis_paths)
-        return len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        idx = idx * 8 # to be deleted
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj-mats between views
-
-        folder_path = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-
-        # last subdir name
-        shape_name = os.path.split(folder_path)[-1]
-
-        pose_json_path = os.path.join(folder_path, "pose.json")
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for image_dix, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[image_dix]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
-        img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
-
-        img = Image.open(img_filename)
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-
-        depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-        depth_h = depth_h.fill_(-1.0)
-        mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-
-        src_views = range(8)
-
-
-        for vid in src_views:
-
-            # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
-            # img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
-            img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[vid]}')
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-
-
-        target_w2cs = []
-        target_intrinsics = []
-        new_target_w2cs = []
-        for i_idx in range(8):
-            target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
-            target_intrinsics.append(self.all_intrinsics[i_idx])
-
-        for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_target_w2cs.append(w2c)
-        target_w2cs = np.stack(new_target_w2cs)
-
-
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32))  # (8, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = shape_name
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
-        # print("meta: ", sample['meta'])
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_temp.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_temp.py
deleted file mode 100644
index 4b2c7f6b2306cca93f476c2c233956e4cff0dcfb..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_temp.py
+++ /dev/null
@@ -1,417 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-
-
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[],
-                 specific_dataset_name = 'GSO'
-                 ):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-        # self.specific_dataset_name = 'Realfusion'
-        # self.specific_dataset_name = 'GSO'
-        # self.specific_dataset_name = 'Objaverse' 
-        self.specific_dataset_name = 'Objaverse_archived'
-
-        # self.specific_dataset_name = specific_dataset_name
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-        assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
-        # find all subfolders
-        main_folder = os.path.join(root_dir, self.specific_dataset_name)
-        self.shape_list = os.listdir(main_folder)
-        self.shape_list.sort()
-
-        # self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
-
-
-        self.lvis_paths = []
-        for shape_name in self.shape_list:
-            self.lvis_paths.append(os.path.join(main_folder, shape_name))
-
-        # print("lvis_paths: ", self.lvis_paths)
-
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        # return 8*len(self.lvis_paths)
-        return len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        idx = idx * 8 # to be deleted
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj-mats between views
-
-        folder_path = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-
-        # last subdir name
-        shape_name = os.path.split(folder_path)[-1]
-
-        pose_json_path = os.path.join('/objaverse-processed/zero12345_img/zero12345_narrow_pose.json')
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for image_dix, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[image_dix]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        # img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
-        img_filename = os.path.join(folder_path, 'stage1_8', f'{idx}.png')
-
-        img = Image.open(img_filename)
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-
-        depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-        depth_h = depth_h.fill_(-1.0)
-        mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-
-        src_views = range(8, 8 + 8 * 4)
-
-        for vid in src_views:
-
-            # img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
-            img_filename = os.path.join(folder_path, 'stage2_8', f'{(vid-8)//4}_{(vid-8)%4}.png')
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-
-
-        target_w2cs = []
-        target_intrinsics = []
-        new_target_w2cs = []
-        for i_idx in range(8):
-            target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
-            target_intrinsics.append(self.all_intrinsics[i_idx])
-
-        for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_target_w2cs.append(w2c)
-        target_w2cs = np.stack(new_target_w2cs)
-
-
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32))  # (8, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = shape_name
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
-        # print("meta: ", sample['meta'])
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_no_depth.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_no_depth.py
deleted file mode 100644
index 33a4ecf7de541049e3b89cc98f74106b59d418c7..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_no_depth.py
+++ /dev/null
@@ -1,388 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 8*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_uid_dict = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
-
-        depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        # directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        # surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        # distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        # depth_h = distance
-
-        depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
-        depth_h = depth_h.fill_(-1.0)
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(8+idx*4, 8+(idx+1)*4)
-        src_views = range(8, 8 + 8 * 4)
-
-        for vid in src_views:
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4.py
deleted file mode 100644
index f811326da45563ae870350f78ccdbe358411f3b6..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4.py
+++ /dev/null
@@ -1,389 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 4*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        idx = idx * 2
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_uid_dict = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
-
-        depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(8+idx*4, 8+(idx+1)*4)
-        src_views = range(8, 8 + 8 * 4)
-
-        for vid in src_views:
-            if (vid // 4) % 2 != 0:
-                continue
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        # print("len(imgs)", len(imgs))
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4_and_4.py b/SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4_and_4.py
deleted file mode 100644
index 76b9fccad69f6929e086074b55807ef5a0a17eee..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4_and_4.py
+++ /dev/null
@@ -1,395 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        
-        self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(list(meta["c2ws"].values()))
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for idx, img_id in enumerate(self.img_ids):
-            pose = self.input_poses[idx]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid, img_id in enumerate(self.img_ids):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 8*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        idx = idx
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_uid_dict = self.lvis_paths[idx//8]
-        idx = idx % 8 # [0, 7]
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
-
-        depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-        # print("valid pixels", np.sum(mask_h))
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(8+idx*4, 8+(idx+1)*4)
-
-        src_views = range(8, 8 + 8 * 4)
-        
-        vid_list = []
-        for vid in src_views:
-            if (vid // 4) % 2 != idx % 2:
-                continue
-            vid_list.append(vid)
-            img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # print("idx:", idx)
-        # print("len(imgs)", len(imgs))
-        # print("vid_list", vid_list)
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/blender_gt_32.py b/SparseNeuS_demo_v1/data/blender_gt_32.py
deleted file mode 100644
index 9ec6f0075febfcd46061e61ae10cd68b05dfb5fc..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/blender_gt_32.py
+++ /dev/null
@@ -1,419 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-import json
-from termcolor import colored
-import imageio
-from kornia import create_meshgrid
-import open3d as o3d
-def get_ray_directions(H, W, focal, center=None):
-    """
-    Get ray directions for all pixels in camera coordinate.
-    Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
-               ray-tracing-generating-camera-rays/standard-coordinate-systems
-    Inputs:
-        H, W, focal: image height, width and focal length
-    Outputs:
-        directions: (H, W, 3), the direction of the rays in camera coordinate
-    """
-    grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
-
-    i, j = grid.unbind(-1)
-    # the direction here is without +0.5 pixel centering as calibration is not so accurate
-    # see https://github.com/bmild/nerf/issues/24
-    cent = center if center is not None else [W / 2, H / 2]
-    directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1)  # (H, W, 3)
-
-    return directions
-
-import os, json
-import numpy as np
-def calc_pose(phis, thetas, size, radius = 1.2):
-    import torch
-    def normalize(vectors):
-        return vectors / (torch.norm(vectors, dim=-1, keepdim=True) + 1e-10)
-    # device = torch.device('cuda')
-    thetas = torch.FloatTensor(thetas)
-    phis = torch.FloatTensor(phis)
-    
-    centers = torch.stack([
-        radius * torch.sin(thetas) * torch.sin(phis),
-        -radius * torch.cos(thetas) * torch.sin(phis),
-        radius * torch.cos(phis),
-    ], dim=-1) # [B, 3]
-
-    # lookat
-    forward_vector = normalize(centers).squeeze(0)
-    up_vector = torch.FloatTensor([0, 0, 1]).unsqueeze(0).repeat(size, 1) 
-    right_vector = normalize(torch.cross(up_vector, forward_vector, dim=-1))      
-    if right_vector.pow(2).sum() < 0.01:
-        right_vector = torch.FloatTensor([0, 1, 0]).unsqueeze(0).repeat(size, 1)  
-    up_vector = normalize(torch.cross(forward_vector, right_vector, dim=-1))     
-
-    poses = torch.eye(4, dtype=torch.float)[:3].unsqueeze(0).repeat(size, 1, 1)
-    poses[:, :3, :3] = torch.stack((right_vector, up_vector, forward_vector), dim=-1)
-    poses[:, :3, 3] = centers 
-    return poses
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class BlenderPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        # print("root_dir: ", root_dir)
-        self.root_dir = root_dir
-        self.split = split
-
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        lvis_json_path = '/objaverse-processed/zero12345_img/random32_split.json' # folder_id and uid
-        with open(lvis_json_path, 'r') as f:
-            lvis_paths = json.load(f)
-            if self.split == 'train':
-                self.lvis_paths = lvis_paths['train']
-            else:
-                self.lvis_paths = lvis_paths['val']
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-        
-        pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
-
-        with open(pose_json_path, 'r') as f:
-            meta = json.load(f)
-        intrinsic = np.eye(4)
-        intrinsic[:3, :3] = np.array(meta["intrinsics"])
-        self.intrinsic = intrinsic
-        self.near_far = np.array(meta["near_far"])
-        self.near_far[1] = 1.8
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
-        
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-
-
-    def load_cam_info(self):
-        for vid in range(self.input_poses.shape[0]):
-            intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        pass
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-
-        center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        # print("center", center)
-        # print("radius", radius)
-        # print("bounds", bounds)
-        # import ipdb; ipdb.set_trace()
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return 32*len(self.lvis_paths)
-
-
-    def read_depth(self, filename, near_bound, noisy_factor=1.0):
-        pass
-
-
-    def __getitem__(self, idx):
-        sample = {}
-        origin_idx = idx
-        imgs, depths_h, masks_h = [], [], []  # full size (256, 256)
-        intrinsics, w2cs, c2ws, near_fars = [], [], [], []  # record proj mats between views
-
-
-        folder_uid_dict = self.lvis_paths[idx//32]
-        idx = idx % 32 # [0, 7]
-        folder_id = folder_uid_dict['folder_id']
-        uid = folder_uid_dict['uid']
-
-        pose_file = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, 'views.npz')
-        pose_array = np.load(pose_file)
-        pose = calc_pose(pose_array['elevations'], pose_array['azimuths'], 32) # [32, 3, 4] c2ws
-  
-        self.img_wh = (256, 256)
-        self.input_poses = np.array(pose)
-        self.input_poses = np.concatenate([self.input_poses, np.tile(np.array([0, 0, 0, 1], dtype=np.float32)[None, None, :], [self.input_poses.shape[0], 1, 1])], axis=1)
-        self.define_transforms()
-        self.blender2opencv = np.array(
-            [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
-        )
-        
-        self.c2ws = []
-        self.w2cs = []
-        self.near_fars = []
-        # self.root_dir = root_dir
-        for image_dix in range(pose.shape[0]):
-            pose = self.input_poses[image_dix]
-            c2w = pose @ self.blender2opencv
-            self.c2ws.append(c2w)
-            self.w2cs.append(np.linalg.inv(c2w))
-            self.near_fars.append(self.near_far)
-        self.c2ws = np.stack(self.c2ws, axis=0)
-        self.w2cs = np.stack(self.w2cs, axis=0)
-
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-        self.load_cam_info() 
-
-
-
-        # target view
-        c2w = self.c2ws[idx]
-        w2c = np.linalg.inv(c2w)
-        w2c_ref = w2c
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        w2cs.append(w2c @ w2c_ref_inv)
-        c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
-
-        img_filename = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{idx}.png')
-
-        depth_filename = os.path.join(os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{idx}_depth_mm.png'))
-        
-
-        img = Image.open(img_filename)
-
-        img = self.transform(img)  # (4, h, w)
-        
-
-        if img.shape[0] == 4:
-            img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-        imgs += [img]
-
-        depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
-        mask_h = depth_h > 0
-
-        directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]])  # [H, W, 3]
-        surface_points = directions * depth_h[..., None]  # [H, W, 3]
-        distance = np.linalg.norm(surface_points, axis=-1)  # [H, W]
-        depth_h = distance
-
-
-        depths_h.append(depth_h)
-        masks_h.append(mask_h)
-        
-        intrinsic = self.intrinsic
-        intrinsics.append(intrinsic)
-    
-
-        near_fars.append(self.near_fars[idx])
-        image_perm = 0  # only supervised on reference view
-
-        mask_dilated = None
-
-        # src_views = range(8+idx*4, 8+(idx+1)*4)
-        src_views = range(0, 8 * 4)
-
-        for vid in src_views:
-            img_filename = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{vid}.png')
-
-            img = Image.open(img_filename)
-            img_wh = self.img_wh
-
-            img = self.transform(img)
-            if img.shape[0] == 4:
-                img = img[:3] * img[-1:] + (1 - img[-1:])  # blend A to RGB
-
-            imgs += [img]
-            depth_h = np.ones(img.shape[1:], dtype=np.float32)
-            depths_h.append(depth_h)
-            masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
-
-            near_fars.append(self.all_near_fars[vid])
-            intrinsics.append(self.all_intrinsics[vid])
-
-            w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
-
-        
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(
-                                                     img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1
-                                                     )
-
-
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        # print(new_near_fars)
-        imgs = torch.stack(imgs).float()
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-        
-        if self.split == 'train':
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        view_ids = [idx] + list(src_views)
-        sample['origin_idx'] = origin_idx
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        # sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = folder_id
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(img_wh))
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = self.partial_vol_origin
-        sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
-
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/dtu/dtu_pairs.txt b/SparseNeuS_demo_v1/data/dtu/dtu_pairs.txt
deleted file mode 100644
index bd0d79868f196991c06ec2a496dbe06e5ded0fd2..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/dtu/dtu_pairs.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-46
-0
-10 10 2346.410000 1 2036.530000 9 1243.890000 12 1052.870000 11 1000.840000 13 703.583000 2 604.456000 8 439.759000 14 327.419000 27 249.278000 
-1
-10 9 2850.870000 10 2583.940000 2 2105.590000 0 2052.840000 8 1868.240000 13 1184.230000 14 1017.510000 12 961.966000 7 670.208000 15 657.218000 
-2
-10 8 2501.240000 1 2106.880000 7 1856.500000 9 1782.340000 3 1141.770000 15 1061.760000 14 815.457000 16 762.153000 6 709.789000 10 699.921000 
-3
-10 7 1294.390000 6 1159.130000 2 1134.270000 4 905.717000 8 687.320000 5 600.015000 17 496.958000 16 481.969000 1 379.011000 15 307.450000 
-4
-10 5 1333.740000 6 1145.150000 3 895.254000 7 486.504000 18 446.420000 2 418.517000 17 326.528000 8 161.115000 16 149.154000 1 103.626000 
-5
-10 6 1676.060000 18 1555.060000 4 1335.550000 17 868.416000 3 593.755000 7 467.816000 20 440.579000 19 428.255000 16 242.327000 21 210.253000 
-6
-10 17 2332.350000 7 1848.240000 18 1812.740000 5 1696.070000 16 1273.000000 3 1157.990000 4 1155.410000 20 771.624000 21 744.945000 2 700.368000 
-7
-10 16 2709.460000 8 2439.700000 15 2078.210000 6 1864.160000 2 1846.600000 17 1791.710000 3 1296.860000 22 957.793000 9 879.088000 21 782.277000 
-8
-10 15 3124.010000 9 3099.920000 14 2756.290000 2 2501.220000 7 2449.320000 1 1875.940000 16 1726.040000 13 1325.760000 23 1177.090000 24 1108.820000 
-9
-10 13 3355.620000 14 3226.070000 8 3098.800000 10 3097.070000 1 2861.420000 12 1873.630000 2 1785.980000 15 1753.320000 25 1365.450000 0 1261.590000 
-10
-10 12 3750.700000 9 3085.870000 13 3028.390000 1 2590.550000 0 2369.790000 11 2266.670000 14 1524.160000 26 1448.150000 27 1293.600000 8 1041.840000 
-11
-10 12 3543.760000 27 3056.050000 10 2248.070000 26 1524.280000 28 1273.330000 13 1265.900000 29 1129.550000 0 998.164000 9 591.176000 30 572.919000 
-12
-10 27 3889.870000 10 3754.540000 13 3745.210000 11 3584.260000 26 3574.560000 25 1877.110000 9 1866.340000 29 1482.720000 30 1418.510000 14 1341.860000 
-13
-10 12 3773.140000 26 3699.280000 25 3657.170000 14 3652.040000 9 3356.290000 10 3049.270000 24 2098.910000 27 1900.960000 31 1460.960000 30 1349.620000 
-14
-10 13 3663.520000 24 3610.690000 9 3232.550000 25 3216.400000 15 3128.840000 8 2758.040000 23 2219.910000 26 1567.450000 10 1536.600000 32 1419.330000 
-15
-10 23 3194.920000 14 3126.000000 8 3120.430000 16 2897.020000 24 2562.490000 7 2084.050000 22 2041.630000 9 1752.080000 33 1232.290000 13 1137.550000 
-16
-10 15 2884.140000 7 2713.880000 22 2708.570000 17 2448.500000 21 2173.300000 23 1908.030000 8 1718.790000 6 1281.960000 35 1047.380000 34 980.064000 
-17
-10 21 2632.480000 16 2428.000000 6 2343.570000 18 2250.230000 20 2149.750000 7 1779.420000 22 1380.250000 36 957.046000 5 878.398000 15 789.068000 
-18
-9 17 2219.150000 20 2173.020000 6 1802.390000 19 1575.770000 5 1564.810000 21 1160.130000 16 660.317000 7 589.484000 36 559.983000 
-19
-7 20 1828.970000 18 1564.630000 17 685.249000 36 613.420000 21 572.770000 5 427.597000 6 368.651000 
-20
-8 21 2569.790000 36 2258.330000 18 2186.710000 17 2130.670000 19 1865.060000 35 996.122000 16 799.808000 40 778.721000 
-21
-9 36 2704.590000 35 2639.690000 17 2638.190000 20 2605.430000 22 2604.260000 16 2158.250000 34 1239.250000 18 1178.240000 40 1128.570000 
-22
-10 23 3232.680000 34 3175.150000 35 2831.090000 16 2712.510000 21 2632.190000 15 2033.390000 33 1712.670000 17 1393.860000 36 1290.960000 24 1195.330000 
-23
-10 24 3710.900000 33 3603.070000 22 3244.200000 15 3190.620000 34 3086.490000 14 2220.110000 32 2100.000000 16 1917.100000 35 1359.790000 25 1356.710000 
-24
-10 25 3844.600000 32 3750.750000 23 3710.600000 14 3609.090000 33 3091.040000 15 2559.240000 31 2423.710000 13 2109.360000 26 1440.580000 34 1410.030000 
-25
-10 26 3951.740000 31 3888.570000 24 3833.070000 13 3667.350000 14 3208.210000 32 2993.460000 30 2681.520000 12 1900.230000 45 1484.030000 27 1462.880000 
-26
-10 30 4033.350000 27 3970.470000 25 3925.250000 13 3686.340000 12 3595.590000 29 2943.870000 31 2917.000000 14 1556.340000 11 1554.750000 46 1503.840000 
-27
-10 29 4027.840000 26 3929.940000 12 3875.580000 11 3085.030000 28 2908.600000 30 2792.670000 13 1878.420000 25 1438.550000 47 1425.200000 10 1290.250000 
-28
-10 29 3687.020000 48 3209.130000 27 2872.860000 47 2014.530000 30 1361.950000 11 1273.600000 26 1062.850000 12 840.841000 46 672.985000 31 271.952000 
-29
-10 27 4029.430000 30 3909.550000 28 3739.930000 47 3695.230000 48 3135.870000 26 2910.970000 46 2229.550000 12 1479.160000 31 1430.260000 11 1144.560000 
-30
-10 26 4029.860000 29 3953.720000 31 3811.120000 46 3630.460000 47 3105.960000 27 2824.430000 25 2657.890000 45 2347.750000 32 1459.110000 12 1429.620000 
-31
-10 25 3882.210000 30 3841.880000 32 3808.500000 45 3649.820000 46 3000.670000 26 2939.940000 24 2409.930000 44 2381.300000 13 1467.590000 29 1459.560000 
-32
-10 31 3826.500000 24 3744.140000 33 3613.240000 44 3552.040000 25 3004.600000 45 2884.590000 43 2393.340000 23 2095.270000 30 1478.600000 14 1420.780000 
-33
-10 32 3618.110000 23 3598.100000 34 3530.530000 43 3462.370000 24 3091.530000 44 2608.080000 42 2426.000000 22 1717.940000 31 1407.650000 25 1324.780000 
-34
-10 33 3523.370000 42 3356.550000 35 3210.340000 22 3178.850000 23 3079.030000 43 2396.450000 41 2386.860000 24 1408.020000 32 1301.340000 21 1256.450000 
-35
-10 34 3187.880000 41 3106.440000 36 2866.040000 22 2817.740000 21 2654.870000 40 2416.980000 42 2137.810000 23 1346.860000 33 1150.330000 16 1044.660000 
-36
-8 40 2910.700000 35 2832.660000 21 2689.960000 20 2280.460000 41 1787.970000 22 1268.490000 34 981.636000 17 954.229000 
-40
-7 36 2918.140000 41 2852.620000 35 2392.960000 21 1124.300000 42 1056.480000 34 877.946000 20 788.701000 
-41
-9 35 3111.050000 42 3049.710000 40 2885.360000 34 2371.020000 36 1813.690000 43 1164.710000 22 1126.900000 21 906.536000 33 903.238000 
-42
-10 34 3356.980000 43 3183.000000 41 3070.540000 33 2421.770000 35 2155.080000 44 1278.410000 23 1183.520000 22 1147.070000 40 1077.080000 32 899.646000 
-43
-10 33 3461.240000 44 3380.740000 42 3188.700000 34 2400.600000 32 2399.090000 45 1359.370000 23 1314.080000 41 1176.120000 24 1159.620000 31 901.556000 
-44
-10 32 3550.810000 45 3510.160000 43 3373.110000 33 2602.330000 31 2395.930000 24 1410.430000 46 1386.310000 42 1279.000000 25 1095.240000 34 968.440000 
-45
-10 31 3650.090000 46 3555.090000 44 3491.150000 32 2868.390000 30 2373.590000 25 1485.370000 47 1405.280000 43 1349.540000 33 1104.770000 26 1046.810000 
-46
-10 30 3635.640000 47 3562.170000 45 3524.170000 31 2976.820000 29 2264.040000 26 1508.870000 44 1367.410000 48 1352.100000 32 1211.240000 25 1102.170000 
-47
-10 29 3705.310000 46 3519.760000 48 3450.480000 30 3074.770000 28 2054.630000 27 1434.570000 45 1377.340000 31 1268.230000 26 1223.830000 25 471.111000 
-48
-10 47 3401.950000 28 3224.840000 29 3101.160000 46 1317.100000 30 1306.700000 27 1235.070000 26 537.731000 31 291.919000 45 276.869000 11 258.856000 
diff --git a/SparseNeuS_demo_v1/data/dtu/lists/test.txt b/SparseNeuS_demo_v1/data/dtu/lists/test.txt
deleted file mode 100644
index b1420254bbe0fe15e9ad9358cdbaedf34605a558..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/dtu/lists/test.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-scan24
-scan37
-scan40
-scan55
-scan63
-scan65
-scan69
-scan83
-scan97
-scan105
-scan106
-scan110
-scan114
-scan118
-scan122
\ No newline at end of file
diff --git a/SparseNeuS_demo_v1/data/dtu/lists/train.txt b/SparseNeuS_demo_v1/data/dtu/lists/train.txt
deleted file mode 100644
index 4259e846edcee621baf19875e2900e169849f5e3..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/dtu/lists/train.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-scan1
-scan4
-scan5
-scan6
-scan8
-scan9
-scan10
-scan11
-scan12
-scan13
-scan14
-scan15
-scan16
-scan17
-scan18
-scan19
-scan20
-scan21
-scan22
-scan23
-scan28
-scan29
-scan30
-scan31
-scan32
-scan33
-scan34
-scan35
-scan36
-scan38
-scan39
-scan41
-scan42
-scan43
-scan44
-scan45
-scan46
-scan47
-scan48
-scan49
-scan50
-scan51
-scan52
-scan59
-scan60
-scan61
-scan62
-scan64
-scan74
-scan75
-scan76
-scan77
-scan84
-scan85
-scan86
-scan87
-scan88
-scan89
-scan90
-scan91
-scan92
-scan93
-scan94
-scan95
-scan96
-scan98
-scan99
-scan100
-scan101
-scan102
-scan103
-scan104
-scan126
-scan127
-scan128
\ No newline at end of file
diff --git a/SparseNeuS_demo_v1/data/dtu_fit.py b/SparseNeuS_demo_v1/data/dtu_fit.py
deleted file mode 100644
index e4a97d28b635a9158c49e2a651c7799ad1009021..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/dtu_fit.py
+++ /dev/null
@@ -1,278 +0,0 @@
-import torch
-import torch.nn as nn
-import cv2 as cv
-import numpy as np
-import re
-import os
-import logging
-from glob import glob
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-
-from data.scene import get_boundingbox
-
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-class DtuFit:
-    def __init__(self, root_dir, split, scan_id, n_views, train_img_idx=[], test_img_idx=[],
-                 img_wh=[800, 600], clip_wh=[0, 0], original_img_wh=[1600, 1200],
-                 N_rays=512, h_patch_size=5, near=425, far=900):
-        super(DtuFit, self).__init__()
-        logging.info('Load data: Begin')
-
-        self.root_dir = root_dir
-        self.split = split
-        self.scan_id = scan_id
-        self.n_views = n_views
-
-        self.near = near
-        self.far = far
-
-        if self.scan_id is not None:
-            self.data_dir = os.path.join(self.root_dir, self.scan_id)
-        else:
-            self.data_dir = self.root_dir
-
-        self.img_wh = img_wh
-        self.clip_wh = clip_wh
-
-        if len(self.clip_wh) == 2:
-            self.clip_wh = self.clip_wh + self.clip_wh
-
-        self.original_img_wh = original_img_wh
-        self.N_rays = N_rays
-        self.h_patch_size = h_patch_size  # used to extract patch for supervision
-        self.train_img_idx = train_img_idx
-        self.test_img_idx = test_img_idx
-
-        camera_dict = np.load(os.path.join(self.data_dir, 'cameras.npz'), allow_pickle=True)
-        self.images_list = sorted(glob(os.path.join(self.data_dir, "image/*.png")))
-        # world_mat: projection matrix: world to image
-        self.world_mats_np = [camera_dict['world_mat_%d' % idx].astype(np.float32) for idx in
-                              range(len(self.images_list))]
-
-        self.raw_near_fars = np.stack([np.array([self.near, self.far]) for i in range(len(self.images_list))])
-
-        # - reference image; transform the world system to the ref-camera system
-        self.ref_img_idx = self.train_img_idx[0]
-        ref_world_mat = self.world_mats_np[self.ref_img_idx]
-        self.ref_w2c = np.linalg.inv(load_K_Rt_from_P(None, ref_world_mat[:3, :4])[1])
-
-        self.all_images = []
-        self.all_intrinsics = []
-        self.all_w2cs = []
-
-        self.load_scene()  # load the scene
-
-        # ! estimate scale_mat
-        self.scale_mat, self.scale_factor = self.cal_scale_mat(
-            img_hw=[self.img_wh[1], self.img_wh[0]],
-            intrinsics=self.all_intrinsics[self.train_img_idx],
-            extrinsics=self.all_w2cs[self.train_img_idx],
-            near_fars=self.raw_near_fars[self.train_img_idx],
-            factor=1.1)
-
-        # * after scaling and translation, unit bounding box
-        self.scaled_intrinsics, self.scaled_w2cs, self.scaled_c2ws, \
-        self.scaled_affine_mats, self.scaled_near_fars = self.scale_cam_info()
-        # import ipdb; ipdb.set_trace()
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-        self.partial_vol_origin = torch.Tensor([-1., -1., -1.])
-
-        logging.info('Load data: End')
-
-    def load_scene(self):
-
-        scale_x = self.img_wh[0] / self.original_img_wh[0]
-        scale_y = self.img_wh[1] / self.original_img_wh[1]
-
-        for idx in range(len(self.images_list)):
-            image = cv.imread(self.images_list[idx])
-            image = cv.resize(image, (self.img_wh[0], self.img_wh[1])) / 255.
-
-            image = image[self.clip_wh[1]:self.img_wh[1] - self.clip_wh[3],
-                    self.clip_wh[0]:self.img_wh[0] - self.clip_wh[2]]
-            self.all_images.append(np.transpose(image[:, :, ::-1], (2, 0, 1))) # append [3,]
-
-            P = self.world_mats_np[idx]
-            P = P[:3, :4]
-            intrinsics, c2w = load_K_Rt_from_P(None, P)
-            w2c = np.linalg.inv(c2w)
-
-            intrinsics[:1] *= scale_x
-            intrinsics[1:2] *= scale_y
-
-            intrinsics[0, 2] -= self.clip_wh[0]
-            intrinsics[1, 2] -= self.clip_wh[1]
-
-            self.all_intrinsics.append(intrinsics)
-            # - transform from world system to ref-camera system
-            self.all_w2cs.append(w2c @ np.linalg.inv(self.ref_w2c))
-
-
-        self.all_images = torch.from_numpy(np.stack(self.all_images)).to(torch.float32)
-        self.all_intrinsics = torch.from_numpy(np.stack(self.all_intrinsics)).to(torch.float32)
-        self.all_w2cs = torch.from_numpy(np.stack(self.all_w2cs)).to(torch.float32)
-        self.img_wh = [self.img_wh[0] - self.clip_wh[0] - self.clip_wh[2],
-                       self.img_wh[1] - self.clip_wh[1] - self.clip_wh[3]]
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-        center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def scale_cam_info(self):
-        new_intrinsics = []
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        for idx in range(len(self.all_images)):
-            intrinsics = self.all_intrinsics[idx]
-            P = intrinsics @ self.all_w2cs[idx] @ self.scale_mat
-            P = P.cpu().numpy()[:3, :4]
-
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            new_intrinsics.append(intrinsics)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsics[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-
-        new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars = \
-            np.stack(new_intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), \
-            np.stack(new_affine_mats), np.stack(new_near_fars)
-
-        new_intrinsics = torch.from_numpy(np.float32(new_intrinsics))
-        new_w2cs = torch.from_numpy(np.float32(new_w2cs))
-        new_c2ws = torch.from_numpy(np.float32(new_c2ws))
-        new_affine_mats = torch.from_numpy(np.float32(new_affine_mats))
-        new_near_fars = torch.from_numpy(np.float32(new_near_fars))
-
-        return new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars
-
-
-    def get_conditional_sample(self):
-        sample = {}
-        support_idxs = self.train_img_idx
-
-        sample['images'] = self.all_images[support_idxs]  # (V, 3, H, W)
-        sample['w2cs'] = self.scaled_w2cs[self.train_img_idx]  # (V, 4, 4)
-        sample['c2ws'] = self.scaled_c2ws[self.train_img_idx]  # (V, 4, 4)
-        sample['near_fars'] = self.scaled_near_fars[self.train_img_idx]  # (V, 2)
-        sample['intrinsics'] = self.scaled_intrinsics[self.train_img_idx][:, :3, :3]  # (V, 3, 3)
-        sample['affine_mats'] = self.scaled_affine_mats[self.train_img_idx]  # ! in world space
-
-        sample['scan'] = self.scan_id
-        sample['scale_factor'] = torch.tensor(self.scale_factor)
-        sample['scale_mat'] = torch.from_numpy(self.scale_mat)
-        sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
-        sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
-        sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
-
-        return sample
-
-    def __len__(self):
-        if self.split == 'train':
-            return self.n_views * 1000
-        else:
-            return len(self.test_img_idx) * 1000
-
-    def __getitem__(self, idx):
-        sample = {}
-
-        if self.split == 'train':
-            render_idx = self.train_img_idx[idx % self.n_views]
-            support_idxs = [idx for idx in self.train_img_idx if idx != render_idx]
-        else:
-            # render_idx = idx % self.n_test_images + self.n_train_images
-            render_idx = self.test_img_idx[idx % len(self.test_img_idx)]
-            support_idxs = [render_idx]
-
-        sample['images'] = self.all_images[support_idxs]  # (V, 3, H, W)
-        sample['w2cs'] = self.scaled_w2cs[support_idxs]  # (V, 4, 4)
-        sample['c2ws'] = self.scaled_c2ws[support_idxs]  # (V, 4, 4)
-        sample['intrinsics'] = self.scaled_intrinsics[support_idxs][:, :3, :3]  # (V, 3, 3)
-        sample['affine_mats'] = self.scaled_affine_mats[support_idxs]  # ! in world space
-        sample['scan'] = self.scan_id
-        sample['scale_factor'] = torch.tensor(self.scale_factor)
-        sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
-        sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
-        sample['img_index'] = torch.tensor(render_idx)
-
-        # - query image
-        sample['query_image'] = self.all_images[render_idx]
-        sample['query_c2w'] = self.scaled_c2ws[render_idx]
-        sample['query_w2c'] = self.scaled_w2cs[render_idx]
-        sample['query_intrinsic'] = self.scaled_intrinsics[render_idx]
-        sample['query_near_far'] = self.scaled_near_fars[render_idx]
-        sample['meta'] = str(self.scan_id) + "_" + os.path.basename(self.images_list[render_idx])
-        sample['scale_mat'] = torch.from_numpy(self.scale_mat)
-        sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
-        sample['rendering_c2ws'] = self.scaled_c2ws[self.test_img_idx]
-        sample['rendering_imgs_idx'] = torch.Tensor(np.array(self.test_img_idx).astype(np.int32))
-
-        # - generate rays
-        if self.split == 'val' or self.split == 'test':
-            sample_rays = gen_rays_from_single_image(
-                self.img_wh[1], self.img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=None,
-                mask=None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                self.img_wh[1], self.img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=None,
-                mask=None,
-                dilated_mask=None,
-                importance_sample=False,
-                h_patch_size=self.h_patch_size
-            )
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/dtu_general.py b/SparseNeuS_demo_v1/data/dtu_general.py
deleted file mode 100644
index c6c7734df6072dd618ccdde71ca428f983a605e8..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/data/dtu_general.py
+++ /dev/null
@@ -1,376 +0,0 @@
-from torch.utils.data import Dataset
-from utils.misc_utils import read_pfm
-import os
-import numpy as np
-import cv2
-from PIL import Image
-import torch
-from torchvision import transforms as T
-from data.scene import get_boundingbox
-
-from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
-
-from termcolor import colored
-import pdb
-import random
-
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv2.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-# ! load one ref-image with multiple src-images in camera coordinate system
-class MVSDatasetDtuPerView(Dataset):
-    def __init__(self, root_dir, split, n_views=3, img_wh=(640, 512), downSample=1.0,
-                 split_filepath=None, pair_filepath=None,
-                 N_rays=512,
-                 vol_dims=[128, 128, 128], batch_size=1,
-                 clean_image=False, importance_sample=False, test_ref_views=[]):
-
-        self.root_dir = root_dir
-        self.split = split
-
-        self.img_wh = img_wh
-        self.downSample = downSample
-        self.num_all_imgs = 49  # this preprocessed DTU dataset has 49 images
-        self.n_views = n_views
-        self.N_rays = N_rays
-        self.batch_size = batch_size  # - used for construct new metas for gru fusion training
-
-        self.clean_image = clean_image
-        self.importance_sample = importance_sample
-        self.test_ref_views = test_ref_views  # used for testing
-        self.scale_factor = 1.0
-        self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
-
-        if img_wh is not None:
-            assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
-                'img_wh must both be multiples of 32!'
-
-        self.split_filepath = f'data/dtu/lists/{self.split}.txt' if split_filepath is None else split_filepath
-        self.pair_filepath = f'data/dtu/dtu_pairs.txt' if pair_filepath is None else pair_filepath
-
-        print(colored("loading all scenes together", 'red'))
-        with open(self.split_filepath) as f:
-            self.scans = [line.rstrip() for line in f.readlines()]
-
-        self.all_intrinsics = []  # the cam info of the whole scene
-        self.all_extrinsics = []
-        self.all_near_fars = []
-
-        self.metas, self.ref_src_pairs = self.build_metas()  # load ref-srcs view pairs info of the scene
-
-        self.allview_ids = [i for i in range(self.num_all_imgs)]
-
-        self.load_cam_info()  # load camera info of DTU, and estimate scale_mat
-
-        self.build_remap()
-        self.define_transforms()
-        print(f'==> image down scale: {self.downSample}')
-
-        # * bounding box for rendering
-        self.bbox_min = np.array([-1.0, -1.0, -1.0])
-        self.bbox_max = np.array([1.0, 1.0, 1.0])
-
-        # - used for cost volume regularization
-        self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
-        self.partial_vol_origin = torch.Tensor([-1., -1., -1.])
-
-    def build_remap(self):
-        self.remap = np.zeros(np.max(self.allview_ids) + 1).astype('int')
-        for i, item in enumerate(self.allview_ids):
-            self.remap[item] = i
-
-    def define_transforms(self):
-        self.transform = T.Compose([T.ToTensor()])
-
-    def build_metas(self):
-        metas = []
-        ref_src_pairs = {}
-        # light conditions 0-6 for training
-        # light condition 3 for testing (the brightest?)
-        light_idxs = [3] if 'train' not in self.split else range(7)
-
-        with open(self.pair_filepath) as f:
-            num_viewpoint = int(f.readline())
-            # viewpoints (49)
-            for _ in range(num_viewpoint):
-                ref_view = int(f.readline().rstrip())
-                src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]
-
-                ref_src_pairs[ref_view] = src_views
-
-        for light_idx in light_idxs:
-            for scan in self.scans:
-                with open(self.pair_filepath) as f:
-                    num_viewpoint = int(f.readline())
-                    # viewpoints (49)
-                    for _ in range(num_viewpoint):
-                        ref_view = int(f.readline().rstrip())
-                        src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]
-
-                        # ! only for validation
-                        if len(self.test_ref_views) > 0 and ref_view not in self.test_ref_views:
-                            continue
-
-                        metas += [(scan, light_idx, ref_view, src_views)]
-
-        return metas, ref_src_pairs
-
-    def read_cam_file(self, filename):
-        with open(filename) as f:
-            lines = [line.rstrip() for line in f.readlines()]
-        # extrinsics: line [1,5), 4x4 matrix
-        extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ')
-        extrinsics = extrinsics.reshape((4, 4))
-        # intrinsics: line [7-10), 3x3 matrix
-        intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ')
-        intrinsics = intrinsics.reshape((3, 3))
-        # depth_min & depth_interval: line 11
-        depth_min = float(lines[11].split()[0])
-        depth_max = depth_min + float(lines[11].split()[1]) * 192
-        self.depth_interval = float(lines[11].split()[1])
-        intrinsics_ = np.float32(np.diag([1, 1, 1, 1]))
-        intrinsics_[:3, :3] = intrinsics
-        return intrinsics_, extrinsics, [depth_min, depth_max]
-
-    def load_cam_info(self):
-        for vid in range(self.num_all_imgs):
-            proj_mat_filename = os.path.join(self.root_dir,
-                                             f'Cameras/train/{vid:08d}_cam.txt')
-            intrinsic, extrinsic, near_far = self.read_cam_file(proj_mat_filename)
-            intrinsic[:2] *= 4  # * the provided intrinsics is 4x downsampled, now keep the same scale with image
-            self.all_intrinsics.append(intrinsic)
-            self.all_extrinsics.append(extrinsic)
-            self.all_near_fars.append(near_far)
-
-    def read_depth(self, filename):
-        # import ipdb; ipdb.set_trace()
-        depth_h = np.array(read_pfm(filename)[0], dtype=np.float32)  # (1200, 1600)
-        depth_h = np.ones((1200, 1600))
-        # print(depth_h.shape)
-        depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
-                             interpolation=cv2.INTER_NEAREST)  # (600, 800)
-        depth_h = depth_h[44:556, 80:720]  # (512, 640)
-        # print(depth_h.shape)
-        # import ipdb; ipdb.set_trace()
-        depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
-                             interpolation=cv2.INTER_NEAREST)
-        depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
-                           interpolation=cv2.INTER_NEAREST)
-
-        return depth, depth_h
-
-    def read_mask(self, filename):
-        mask_h = cv2.imread(filename, 0)
-        mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
-                            interpolation=cv2.INTER_NEAREST)
-        mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
-                          interpolation=cv2.INTER_NEAREST)
-
-        mask[mask > 0] = 1  # the masks stored in png are not binary
-        mask_h[mask_h > 0] = 1
-
-        return mask, mask_h
-
-    def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
-        center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
-        radius = radius * factor
-        scale_mat = np.diag([radius, radius, radius, 1.0])
-        scale_mat[:3, 3] = center.cpu().numpy()
-        scale_mat = scale_mat.astype(np.float32)
-
-        return scale_mat, 1. / radius.cpu().numpy()
-
-    def __len__(self):
-        return len(self.metas)
-
-    def __getitem__(self, idx):
-        sample = {}
-        scan, light_idx, ref_view, src_views = self.metas[idx % len(self.metas)]
-
-        # generalized, load some images at once
-        view_ids = [ref_view] + src_views[:self.n_views]
-        # * transform from world system to camera system
-        w2c_ref = self.all_extrinsics[self.remap[ref_view]]
-        w2c_ref_inv = np.linalg.inv(w2c_ref)
-
-        image_perm = 0  # only supervised on reference view
-
-        imgs, depths_h, masks_h = [], [], []  # full size (640, 512)
-        intrinsics, w2cs, near_fars = [], [], []  # record proj mats between views
-        mask_dilated = None
-        for i, vid in enumerate(view_ids):
-            # NOTE that the id in image file names is from 1 to 49 (not 0~48)
-            img_filename = os.path.join(self.root_dir,
-                                        f'Rectified/{scan}_train/rect_{vid + 1:03d}_{light_idx}_r5000.png')
-            depth_filename = os.path.join(self.root_dir,
-                                          f'Depths/{scan}_train/depth_map_{vid:04d}.pfm')
-            # print(depth_filename)
-            mask_filename = os.path.join(self.root_dir,
-                                         f'Masks_clean_dilated/{scan}_train/mask_{vid:04d}.png')
-
-            img = Image.open(img_filename)
-            img_wh = np.round(np.array(img.size) * self.downSample).astype('int')
-            img = img.resize(img_wh, Image.BILINEAR)
-
-            if os.path.exists(mask_filename) and self.clean_image:
-                mask_l, mask_h = self.read_mask(mask_filename)
-            else:
-                # print(self.split, "don't find mask file", mask_filename)
-                mask_h = np.ones([img_wh[1], img_wh[0]])
-            masks_h.append(mask_h)
-
-            if i == 0:
-                kernel_size = 101  # default 101
-                kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
-                mask_dilated = np.float32(cv2.dilate(np.uint8(mask_h * 255), kernel, iterations=1) > 128)
-
-            if self.clean_image:
-                img = np.array(img)
-                img[mask_h < 0.5] = 0.0
-
-            img = self.transform(img)
-
-            imgs += [img]
-
-            index_mat = self.remap[vid]
-            near_fars.append(self.all_near_fars[index_mat])
-            intrinsics.append(self.all_intrinsics[index_mat])
-            
-            w2cs.append(self.all_extrinsics[index_mat] @ w2c_ref_inv)
-
-            # print(depth_filename)
-            if os.path.exists(depth_filename):  # and i == 0
-                # print("file exists")
-                depth_l, depth_h = self.read_depth(depth_filename)
-                depths_h.append(depth_h)
-        # ! estimate scale_mat
-        scale_mat, scale_factor = self.cal_scale_mat(img_hw=[img_wh[1], img_wh[0]],
-                                                     intrinsics=intrinsics, extrinsics=w2cs,
-                                                     near_fars=near_fars, factor=1.1)
-
-        # ! calculate the new w2cs after scaling
-        new_near_fars = []
-        new_w2cs = []
-        new_c2ws = []
-        new_affine_mats = []
-        new_depths_h = []
-        for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
-            P = intrinsic @ extrinsic @ scale_mat
-            P = P[:3, :4]
-            # - should use load_K_Rt_from_P() to obtain c2w
-            c2w = load_K_Rt_from_P(None, P)[1]
-            w2c = np.linalg.inv(c2w)
-            new_w2cs.append(w2c)
-            new_c2ws.append(c2w)
-            affine_mat = np.eye(4)
-            affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
-            new_affine_mats.append(affine_mat)
-
-            camera_o = c2w[:3, 3]
-            dist = np.sqrt(np.sum(camera_o ** 2))
-            near = dist - 1
-            far = dist + 1
-
-            new_near_fars.append([0.95 * near, 1.05 * far])
-            new_depths_h.append(depth * scale_factor)
-
-        imgs = torch.stack(imgs).float()
-        print(new_near_fars)
-        depths_h = np.stack(new_depths_h)
-        masks_h = np.stack(masks_h)
-
-        affine_mats = np.stack(new_affine_mats)
-        intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
-            new_near_fars)
-
-        if 'train' in self.split:
-            start_idx = 0
-        else:
-            start_idx = 1
-
-        sample['images'] = imgs  # (V, 3, H, W)
-        sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32))  # (V, H, W)
-        sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32))  # (V, H, W)
-        sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32))  # (V, 4, 4)
-        sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32))  # (V, 4, 4)
-        sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32))  # (V, 2)
-        sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3]  # (V, 3, 3)
-        sample['view_ids'] = torch.from_numpy(np.array(view_ids))
-        sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32))  # ! in world space
-
-        sample['light_idx'] = torch.tensor(light_idx)
-        sample['scan'] = scan
-
-        sample['scale_factor'] = torch.tensor(scale_factor)
-        sample['img_wh'] = torch.from_numpy(img_wh)
-        sample['render_img_idx'] = torch.tensor(image_perm)
-        sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
-        sample['meta'] = str(scan) + "_light" + str(light_idx) + "_refview" + str(ref_view)
-
-        # - image to render
-        sample['query_image'] = sample['images'][0]
-        sample['query_c2w'] = sample['c2ws'][0]
-        sample['query_w2c'] = sample['w2cs'][0]
-        sample['query_intrinsic'] = sample['intrinsics'][0]
-        sample['query_depth'] = sample['depths_h'][0]
-        sample['query_mask'] = sample['masks_h'][0]
-        sample['query_near_far'] = sample['near_fars'][0]
-
-        sample['images'] = sample['images'][start_idx:]  # (V, 3, H, W)
-        sample['depths_h'] = sample['depths_h'][start_idx:]  # (V, H, W)
-        sample['masks_h'] = sample['masks_h'][start_idx:]  # (V, H, W)
-        sample['w2cs'] = sample['w2cs'][start_idx:]  # (V, 4, 4)
-        sample['c2ws'] = sample['c2ws'][start_idx:]  # (V, 4, 4)
-        sample['intrinsics'] = sample['intrinsics'][start_idx:]  # (V, 3, 3)
-        sample['view_ids'] = sample['view_ids'][start_idx:]
-        sample['affine_mats'] = sample['affine_mats'][start_idx:]  # ! in world space
-
-        sample['scale_mat'] = torch.from_numpy(scale_mat)
-        sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
-
-        # - generate rays
-        if ('val' in self.split) or ('test' in self.split):
-            sample_rays = gen_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None)
-        else:
-            sample_rays = gen_random_rays_from_single_image(
-                img_wh[1], img_wh[0],
-                self.N_rays,
-                sample['query_image'],
-                sample['query_intrinsic'],
-                sample['query_c2w'],
-                depth=sample['query_depth'],
-                mask=sample['query_mask'] if self.clean_image else None,
-                dilated_mask=mask_dilated,
-                importance_sample=self.importance_sample)
-
-        sample['rays'] = sample_rays
-
-        return sample
diff --git a/SparseNeuS_demo_v1/data/scene.py b/SparseNeuS_demo_v1/data/scene.py
index 49183c65418338864ecabdd1af914bbb0f055579..5f34f4abf9977fba8a3f8785ef4f0c95dbd9fa1b 100644
--- a/SparseNeuS_demo_v1/data/scene.py
+++ b/SparseNeuS_demo_v1/data/scene.py
@@ -1,6 +1,5 @@
 import numpy as np
 import torch
-import pdb
 
 
 def rigid_transform(xyz, transform):
diff --git a/SparseNeuS_demo_v1/evaluation/__init__.py b/SparseNeuS_demo_v1/evaluation/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/SparseNeuS_demo_v1/evaluation/clean_mesh.py b/SparseNeuS_demo_v1/evaluation/clean_mesh.py
deleted file mode 100644
index ab65cc72d3be615b71ec852a7adea933355aa250..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/evaluation/clean_mesh.py
+++ /dev/null
@@ -1,283 +0,0 @@
-import numpy as np
-import cv2 as cv
-import os
-from glob import glob
-from scipy.io import loadmat
-import trimesh
-import open3d as o3d
-import torch
-from tqdm import tqdm
-
-import sys
-
-sys.path.append("../")
-
-
-def gen_rays_from_single_image(H, W, image, intrinsic, c2w, depth=None, mask=None):
-    """
-    generate rays in world space, for image image
-    :param H:
-    :param W:
-    :param intrinsics: [3,3]
-    :param c2ws: [4,4]
-    :return:
-    """
-    device = image.device
-    ys, xs = torch.meshgrid(torch.linspace(0, H - 1, H),
-                            torch.linspace(0, W - 1, W))  # pytorch's meshgrid has indexing='ij'
-    p = torch.stack([xs, ys, torch.ones_like(ys)], dim=-1)  # H, W, 3
-
-    # normalized ndc uv coordinates, (-1, 1)
-    ndc_u = 2 * xs / (W - 1) - 1
-    ndc_v = 2 * ys / (H - 1) - 1
-    rays_ndc_uv = torch.stack([ndc_u, ndc_v], dim=-1).view(-1, 2).float().to(device)
-
-    intrinsic_inv = torch.inverse(intrinsic)
-
-    p = p.view(-1, 3).float().to(device)  # N_rays, 3
-    p = torch.matmul(intrinsic_inv[None, :3, :3], p[:, :, None]).squeeze()  # N_rays, 3
-    rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True)  # N_rays, 3
-    rays_v = torch.matmul(c2w[None, :3, :3], rays_v[:, :, None]).squeeze()  # N_rays, 3
-    rays_o = c2w[None, :3, 3].expand(rays_v.shape)  # N_rays, 3
-
-    image = image.permute(1, 2, 0)
-    color = image.view(-1, 3)
-    depth = depth.view(-1, 1) if depth is not None else None
-    mask = mask.view(-1, 1) if mask is not None else torch.ones([H * W, 1]).to(device)
-    sample = {
-        'rays_o': rays_o,
-        'rays_v': rays_v,
-        'rays_ndc_uv': rays_ndc_uv,
-        'rays_color': color,
-        # 'rays_depth': depth,
-        'rays_mask': mask,
-        'rays_norm_XYZ_cam': p  # - XYZ_cam, before multiply depth
-    }
-    if depth is not None:
-        sample['rays_depth'] = depth
-
-    return sample
-
-
-def load_K_Rt_from_P(filename, P=None):
-    if P is None:
-        lines = open(filename).read().splitlines()
-        if len(lines) == 4:
-            lines = lines[1:]
-        lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
-        P = np.asarray(lines).astype(np.float32).squeeze()
-
-    out = cv.decomposeProjectionMatrix(P)
-    K = out[0]
-    R = out[1]
-    t = out[2]
-
-    K = K / K[2, 2]
-    intrinsics = np.eye(4)
-    intrinsics[:3, :3] = K
-
-    pose = np.eye(4, dtype=np.float32)
-    pose[:3, :3] = R.transpose()  # ? why need transpose here
-    pose[:3, 3] = (t[:3] / t[3])[:, 0]
-
-    return intrinsics, pose  # ! return cam2world matrix here
-
-
-def clean_points_by_mask(points, scan, imgs_idx=None, minimal_vis=0, mask_dilated_size=11):
-    cameras = np.load('{}/scan{}/cameras.npz'.format(DTU_DIR, scan))
-    mask_lis = sorted(glob('{}/scan{}/mask/*.png'.format(DTU_DIR, scan)))
-    n_images = 49 if scan < 83 else 64
-    inside_mask = np.zeros(len(points))
-
-    if imgs_idx is None:
-        imgs_idx = [i for i in range(n_images)]
-
-    # imgs_idx = [i for i in range(n_images)]
-    for i in imgs_idx:
-        P = cameras['world_mat_{}'.format(i)]
-        pts_image = np.matmul(P[None, :3, :3], points[:, :, None]).squeeze() + P[None, :3, 3]
-        pts_image = pts_image / pts_image[:, 2:]
-        pts_image = np.round(pts_image).astype(np.int32) + 1
-
-        mask_image = cv.imread(mask_lis[i])
-        kernel_size = mask_dilated_size  # default 101
-        kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (kernel_size, kernel_size))
-        mask_image = cv.dilate(mask_image, kernel, iterations=1)
-        mask_image = (mask_image[:, :, 0] > 128)
-
-        mask_image = np.concatenate([np.ones([1, 1600]), mask_image, np.ones([1, 1600])], axis=0)
-        mask_image = np.concatenate([np.ones([1202, 1]), mask_image, np.ones([1202, 1])], axis=1)
-
-        in_mask = (pts_image[:, 0] >= 0) * (pts_image[:, 0] <= 1600) * (pts_image[:, 1] >= 0) * (
-                pts_image[:, 1] <= 1200) > 0
-        curr_mask = mask_image[(pts_image[:, 1].clip(0, 1201), pts_image[:, 0].clip(0, 1601))]
-
-        curr_mask = curr_mask.astype(np.float32) * in_mask
-
-        inside_mask += curr_mask
-
-    return inside_mask > minimal_vis
-
-
-def clean_mesh_faces_by_mask(mesh_file, new_mesh_file, scan, imgs_idx, minimal_vis=0, mask_dilated_size=11):
-    old_mesh = trimesh.load(mesh_file)
-    old_vertices = old_mesh.vertices[:]
-    old_faces = old_mesh.faces[:]
-    mask = clean_points_by_mask(old_vertices, scan, imgs_idx, minimal_vis, mask_dilated_size)
-    indexes = np.ones(len(old_vertices)) * -1
-    indexes = indexes.astype(np.long)
-    indexes[np.where(mask)] = np.arange(len(np.where(mask)[0]))
-
-    faces_mask = mask[old_faces[:, 0]] & mask[old_faces[:, 1]] & mask[old_faces[:, 2]]
-    new_faces = old_faces[np.where(faces_mask)]
-    new_faces[:, 0] = indexes[new_faces[:, 0]]
-    new_faces[:, 1] = indexes[new_faces[:, 1]]
-    new_faces[:, 2] = indexes[new_faces[:, 2]]
-    new_vertices = old_vertices[np.where(mask)]
-
-    new_mesh = trimesh.Trimesh(new_vertices, new_faces)
-
-    new_mesh.export(new_mesh_file)
-
-
-def clean_mesh_by_faces_num(mesh, faces_num=500):
-    old_vertices = mesh.vertices[:]
-    old_faces = mesh.faces[:]
-
-    cc = trimesh.graph.connected_components(mesh.face_adjacency, min_len=faces_num)
-    mask = np.zeros(len(mesh.faces), dtype=np.bool)
-    mask[np.concatenate(cc)] = True
-
-    indexes = np.ones(len(old_vertices)) * -1
-    indexes = indexes.astype(np.long)
-    indexes[np.where(mask)] = np.arange(len(np.where(mask)[0]))
-
-    faces_mask = mask[old_faces[:, 0]] & mask[old_faces[:, 1]] & mask[old_faces[:, 2]]
-    new_faces = old_faces[np.where(faces_mask)]
-    new_faces[:, 0] = indexes[new_faces[:, 0]]
-    new_faces[:, 1] = indexes[new_faces[:, 1]]
-    new_faces[:, 2] = indexes[new_faces[:, 2]]
-    new_vertices = old_vertices[np.where(mask)]
-
-    new_mesh = trimesh.Trimesh(new_vertices, new_faces)
-
-    return new_mesh
-
-
-def clean_mesh_faces_outside_frustum(old_mesh_file, new_mesh_file, imgs_idx, H=1200, W=1600, mask_dilated_size=11,
-                                     isolated_face_num=500, keep_largest=True):
-    '''Remove faces of mesh which cannot be orserved by all cameras
-    '''
-    # if path_mask_npz:
-    #     path_save_clean = IOUtils.add_file_name_suffix(path_save_clean, '_mask')
-
-    cameras = np.load('{}/scan{}/cameras.npz'.format(DTU_DIR, scan))
-    mask_lis = sorted(glob('{}/scan{}/mask/*.png'.format(DTU_DIR, scan)))
-
-    mesh = trimesh.load(old_mesh_file)
-    intersector = trimesh.ray.ray_pyembree.RayMeshIntersector(mesh)
-
-    all_indices = []
-    chunk_size = 5120
-    for i in imgs_idx:
-        mask_image = cv.imread(mask_lis[i])
-        kernel_size = mask_dilated_size  # default 101
-        kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (kernel_size, kernel_size))
-        mask_image = cv.dilate(mask_image, kernel, iterations=1)
-
-        P = cameras['world_mat_{}'.format(i)]
-
-        intrinsic, pose = load_K_Rt_from_P(None, P[:3, :])
-
-        rays = gen_rays_from_single_image(H, W, torch.from_numpy(mask_image).permute(2, 0, 1).float(),
-                                          torch.from_numpy(intrinsic)[:3, :3].float(),
-                                          torch.from_numpy(pose).float())
-        rays_o = rays['rays_o']
-        rays_d = rays['rays_v']
-        rays_mask = rays['rays_color']
-
-        rays_o = rays_o.split(chunk_size)
-        rays_d = rays_d.split(chunk_size)
-        rays_mask = rays_mask.split(chunk_size)
-
-        for rays_o_batch, rays_d_batch, rays_mask_batch in tqdm(zip(rays_o, rays_d, rays_mask)):
-            rays_mask_batch = rays_mask_batch[:, 0] > 128
-            rays_o_batch = rays_o_batch[rays_mask_batch]
-            rays_d_batch = rays_d_batch[rays_mask_batch]
-
-            idx_faces_hits = intersector.intersects_first(rays_o_batch.cpu().numpy(), rays_d_batch.cpu().numpy())
-            all_indices.append(idx_faces_hits)
-
-    values = np.unique(np.concatenate(all_indices, axis=0))
-    mask_faces = np.ones(len(mesh.faces))
-    mask_faces[values[1:]] = 0
-    print(f'Surfaces/Kept: {len(mesh.faces)}/{len(values)}')
-
-    mesh_o3d = o3d.io.read_triangle_mesh(old_mesh_file)
-    print("removing triangles by mask")
-    mesh_o3d.remove_triangles_by_mask(mask_faces)
-
-    o3d.io.write_triangle_mesh(new_mesh_file, mesh_o3d)
-
-    # # clean meshes
-    new_mesh = trimesh.load(new_mesh_file)
-    cc = trimesh.graph.connected_components(new_mesh.face_adjacency, min_len=500)
-    mask = np.zeros(len(new_mesh.faces), dtype=np.bool)
-    mask[np.concatenate(cc)] = True
-    new_mesh.update_faces(mask)
-    new_mesh.remove_unreferenced_vertices()
-    new_mesh.export(new_mesh_file)
-
-    # meshes = new_mesh.split(only_watertight=False)
-    #
-    # if not keep_largest:
-    #     meshes = [mesh for mesh in meshes if len(mesh.faces) > isolated_face_num]
-    #     # new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])]
-    #     merged_mesh = trimesh.util.concatenate(meshes)
-    #     merged_mesh.export(new_mesh_file)
-    # else:
-    #     new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])]
-    #     new_mesh.export(new_mesh_file)
-
-    o3d.io.write_triangle_mesh(new_mesh_file.replace(".ply", "_raw.ply"), mesh_o3d)
-    print("finishing removing triangles")
-
-
-def clean_outliers(old_mesh_file, new_mesh_file):
-    new_mesh = trimesh.load(old_mesh_file)
-
-    meshes = new_mesh.split(only_watertight=False)
-    new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])]
-
-    new_mesh.export(new_mesh_file)
-
-
-if __name__ == "__main__":
-
-    scans = [24, 37, 40, 55, 63, 65, 69, 83, 97, 105, 106, 110, 114, 118, 122]
-
-    mask_kernel_size = 11
-
-    imgs_idx = [0, 1, 2]
-    # imgs_idx = [42, 43, 44]
-    # imgs_idx = [1, 8, 9]
-
-    DTU_DIR = "/home/xiaoxiao/dataset/DTU_IDR/DTU"
-    # DTU_DIR = "/userhome/cs/xxlong/dataset/DTU_IDR/DTU"
-
-    base_path = "/home/xiaoxiao/Workplace/nerf_reconstruction/Volume_NeuS/neus_camsys/exp/dtu/evaluation_23_24_33_new/volsdf"
-
-    for scan in scans:
-        print("processing scan%d" % scan)
-        dir_path = os.path.join(base_path, "scan%d" % scan)
-
-        old_mesh_file = glob(os.path.join(dir_path, "*.ply"))[0]
-
-        clean_mesh_file = os.path.join(dir_path, "clean_%03d.ply" % scan)
-        final_mesh_file = os.path.join(dir_path, "final_%03d.ply" % scan)
-
-        clean_mesh_faces_by_mask(old_mesh_file, clean_mesh_file, scan, imgs_idx, minimal_vis=1,
-                                 mask_dilated_size=mask_kernel_size)
-        clean_mesh_faces_outside_frustum(clean_mesh_file, final_mesh_file, imgs_idx, mask_dilated_size=mask_kernel_size)
-
-        print("finish processing scan%d" % scan)
diff --git a/SparseNeuS_demo_v1/evaluation/eval_dtu_python.py b/SparseNeuS_demo_v1/evaluation/eval_dtu_python.py
deleted file mode 100644
index a60230705ab3f8c7c2a0ed64a20634c7ab4d2eea..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/evaluation/eval_dtu_python.py
+++ /dev/null
@@ -1,369 +0,0 @@
-import numpy as np
-import open3d as o3d
-import sklearn.neighbors as skln
-from tqdm import tqdm
-from scipy.io import loadmat
-import multiprocessing as mp
-import argparse, os, sys
-import cv2 as cv
-
-from pathlib import Path
-
-
-def get_path_components(path):
-    path = Path(path)
-    ppath = str(path.parent)
-    stem = str(path.stem)
-    ext = str(path.suffix)
-    return ppath, stem, ext
-
-
-def sample_single_tri(input_):
-    n1, n2, v1, v2, tri_vert = input_
-    c = np.mgrid[:n1 + 1, :n2 + 1]
-    c += 0.5
-    c[0] /= max(n1, 1e-7)
-    c[1] /= max(n2, 1e-7)
-    c = np.transpose(c, (1, 2, 0))
-    k = c[c.sum(axis=-1) < 1]  # m2
-    q = v1 * k[:, :1] + v2 * k[:, 1:] + tri_vert
-    return q
-
-
-def write_vis_pcd(file, points, colors):
-    pcd = o3d.geometry.PointCloud()
-    pcd.points = o3d.utility.Vector3dVector(points)
-    pcd.colors = o3d.utility.Vector3dVector(colors)
-    o3d.io.write_point_cloud(file, pcd)
-
-
-def eval_cloud(args, num_cpu_cores=-1):
-    mp.freeze_support()
-    os.makedirs(args.vis_out_dir, exist_ok=True)
-
-    thresh = args.downsample_density
-    if args.mode == 'mesh':
-        pbar = tqdm(total=9)
-        pbar.set_description('read data mesh')
-        data_mesh = o3d.io.read_triangle_mesh(args.data)
-
-        vertices = np.asarray(data_mesh.vertices)
-        triangles = np.asarray(data_mesh.triangles)
-        tri_vert = vertices[triangles]
-
-        pbar.update(1)
-        pbar.set_description('sample pcd from mesh')
-        v1 = tri_vert[:, 1] - tri_vert[:, 0]
-        v2 = tri_vert[:, 2] - tri_vert[:, 0]
-        l1 = np.linalg.norm(v1, axis=-1, keepdims=True)
-        l2 = np.linalg.norm(v2, axis=-1, keepdims=True)
-        area2 = np.linalg.norm(np.cross(v1, v2), axis=-1, keepdims=True)
-        non_zero_area = (area2 > 0)[:, 0]
-        l1, l2, area2, v1, v2, tri_vert = [
-            arr[non_zero_area] for arr in [l1, l2, area2, v1, v2, tri_vert]
-        ]
-        thr = thresh * np.sqrt(l1 * l2 / area2)
-        n1 = np.floor(l1 / thr)
-        n2 = np.floor(l2 / thr)
-
-        with mp.Pool() as mp_pool:
-            new_pts = mp_pool.map(sample_single_tri,
-                                  ((n1[i, 0], n2[i, 0], v1[i:i + 1], v2[i:i + 1], tri_vert[i:i + 1, 0]) for i in
-                                   range(len(n1))), chunksize=1024)
-
-        new_pts = np.concatenate(new_pts, axis=0)
-        data_pcd = np.concatenate([vertices, new_pts], axis=0)
-
-    elif args.mode == 'pcd':
-        pbar = tqdm(total=8)
-        pbar.set_description('read data pcd')
-        data_pcd_o3d = o3d.io.read_point_cloud(args.data)
-        data_pcd = np.asarray(data_pcd_o3d.points)
-
-    pbar.update(1)
-    pbar.set_description('random shuffle pcd index')
-    shuffle_rng = np.random.default_rng()
-    shuffle_rng.shuffle(data_pcd, axis=0)
-
-    pbar.update(1)
-    pbar.set_description('downsample pcd')
-    nn_engine = skln.NearestNeighbors(n_neighbors=1, radius=thresh, algorithm='kd_tree', n_jobs=num_cpu_cores)
-    nn_engine.fit(data_pcd)
-    rnn_idxs = nn_engine.radius_neighbors(data_pcd, radius=thresh, return_distance=False)
-    mask = np.ones(data_pcd.shape[0], dtype=np.bool_)
-    for curr, idxs in enumerate(rnn_idxs):
-        if mask[curr]:
-            mask[idxs] = 0
-            mask[curr] = 1
-    data_down = data_pcd[mask]
-
-    pbar.update(1)
-    pbar.set_description('masking data pcd')
-    obs_mask_file = loadmat(f'{args.dataset_dir}/ObsMask/ObsMask{args.scan}_10.mat')
-    ObsMask, BB, Res = [obs_mask_file[attr] for attr in ['ObsMask', 'BB', 'Res']]
-    BB = BB.astype(np.float32)
-
-    patch = args.patch_size
-    inbound = ((data_down >= BB[:1] - patch) & (data_down < BB[1:] + patch * 2)).sum(axis=-1) == 3
-    data_in = data_down[inbound]
-
-    data_grid = np.around((data_in - BB[:1]) / Res).astype(np.int32)
-    grid_inbound = ((data_grid >= 0) & (data_grid < np.expand_dims(ObsMask.shape, 0))).sum(axis=-1) == 3
-    data_grid_in = data_grid[grid_inbound]
-    in_obs = ObsMask[data_grid_in[:, 0], data_grid_in[:, 1], data_grid_in[:, 2]].astype(np.bool_)
-    data_in_obs = data_in[grid_inbound][in_obs]
-
-    pbar.update(1)
-    pbar.set_description('read STL pcd')
-    stl_pcd = o3d.io.read_point_cloud(args.gt)
-    stl = np.asarray(stl_pcd.points)
-
-    pbar.update(1)
-    pbar.set_description('compute data2stl')
-    nn_engine.fit(stl)
-    dist_d2s, idx_d2s = nn_engine.kneighbors(data_in_obs, n_neighbors=1, return_distance=True)
-    max_dist = args.max_dist
-    mean_d2s = dist_d2s[dist_d2s < max_dist].mean()
-
-    pbar.update(1)
-    pbar.set_description('compute stl2data')
-    ground_plane = loadmat(f'{args.dataset_dir}/ObsMask/Plane{args.scan}.mat')['P']
-
-    stl_hom = np.concatenate([stl, np.ones_like(stl[:, :1])], -1)
-    above = (ground_plane.reshape((1, 4)) * stl_hom).sum(-1) > 0
-    stl_above = stl[above]
-
-    nn_engine.fit(data_in)
-    dist_s2d, idx_s2d = nn_engine.kneighbors(stl_above, n_neighbors=1, return_distance=True)
-    mean_s2d = dist_s2d[dist_s2d < max_dist].mean()
-
-    pbar.update(1)
-    pbar.set_description('visualize error')
-    vis_dist = args.visualize_threshold
-    R = np.array([[1, 0, 0]], dtype=np.float64)
-    G = np.array([[0, 1, 0]], dtype=np.float64)
-    B = np.array([[0, 0, 1]], dtype=np.float64)
-    W = np.array([[1, 1, 1]], dtype=np.float64)
-    data_color = np.tile(B, (data_down.shape[0], 1))
-    data_alpha = dist_d2s.clip(max=vis_dist) / vis_dist
-    data_color[np.where(inbound)[0][grid_inbound][in_obs]] = R * data_alpha + W * (1 - data_alpha)
-    data_color[np.where(inbound)[0][grid_inbound][in_obs][dist_d2s[:, 0] >= max_dist]] = G
-    write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_d2gt.ply', data_down, data_color)
-    stl_color = np.tile(B, (stl.shape[0], 1))
-    stl_alpha = dist_s2d.clip(max=vis_dist) / vis_dist
-    stl_color[np.where(above)[0]] = R * stl_alpha + W * (1 - stl_alpha)
-    stl_color[np.where(above)[0][dist_s2d[:, 0] >= max_dist]] = G
-    write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_gt2d.ply', stl, stl_color)
-
-    pbar.update(1)
-    pbar.set_description('done')
-    pbar.close()
-    over_all = (mean_d2s + mean_s2d) / 2
-    print(f'ean_d2gt: {mean_d2s}; mean_gt2d: {mean_s2d}  over_all: {over_all}; .')
-
-    pparent, stem, ext = get_path_components(args.data)
-    if args.log is None:
-        path_log = os.path.join(pparent, 'eval_result.txt')
-    else:
-        path_log = args.log
-    with open(path_log, 'a+') as fLog:
-        fLog.write(f'mean_d2gt {np.round(mean_d2s, 3)} '
-                   f'mean_gt2d {np.round(mean_s2d, 3)} '
-                   f'Over_all {np.round(over_all, 3)} '
-                   f'[{stem}] \n')
-
-    return over_all, mean_d2s, mean_s2d
-
-
-if __name__ == '__main__':
-    from glob import glob
-
-    mp.freeze_support()
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--data', type=str, default='data_in.ply')
-    parser.add_argument('--gt', type=str, help='ground truth')
-    parser.add_argument('--scan', type=int, default=1)
-    parser.add_argument('--mode', type=str, default='mesh', choices=['mesh', 'pcd'])
-    parser.add_argument('--dataset_dir', type=str, default='/dataset/dtu_official/SampleSet/MVS_Data')
-    parser.add_argument('--vis_out_dir', type=str, default='.')
-    parser.add_argument('--downsample_density', type=float, default=0.2)
-    parser.add_argument('--patch_size', type=float, default=60)
-    parser.add_argument('--max_dist', type=float, default=20)
-    parser.add_argument('--visualize_threshold', type=float, default=10)
-    parser.add_argument('--log', type=str, default=None)
-    args = parser.parse_args()
-
-    base_dir = "./exp"
-
-    GT_DIR = "./gt_pcd"
-
-    scans = [24, 37, 40, 55, 63, 65, 69, 83, 97, 105, 106, 110, 114, 118, 122]
-
-    for scan in scans:
-
-        print("processing scan%d" % scan)
-
-        args.data = os.path.join(base_dir, "scan{}".format(scan), "final_%03d.ply" % scan)
-
-        if not os.path.exists(args.data):
-            continue
-
-        args.gt = os.path.join(GT_DIR, "stl%03d_total.ply" % scan)
-        args.vis_out_dir = os.path.join(base_dir, "scan{}".format(scan))
-        args.scan = scan
-        os.makedirs(args.vis_out_dir, exist_ok=True)
-
-        dist_thred1 = 1
-        dist_thred2 = 2
-
-        thresh = args.downsample_density
-
-        if args.mode == 'mesh':
-            pbar = tqdm(total=9)
-            pbar.set_description('read data mesh')
-            data_mesh = o3d.io.read_triangle_mesh(args.data)
-
-            vertices = np.asarray(data_mesh.vertices)
-            triangles = np.asarray(data_mesh.triangles)
-            tri_vert = vertices[triangles]
-
-            pbar.update(1)
-            pbar.set_description('sample pcd from mesh')
-            v1 = tri_vert[:, 1] - tri_vert[:, 0]
-            v2 = tri_vert[:, 2] - tri_vert[:, 0]
-            l1 = np.linalg.norm(v1, axis=-1, keepdims=True)
-            l2 = np.linalg.norm(v2, axis=-1, keepdims=True)
-            area2 = np.linalg.norm(np.cross(v1, v2), axis=-1, keepdims=True)
-            non_zero_area = (area2 > 0)[:, 0]
-            l1, l2, area2, v1, v2, tri_vert = [
-                arr[non_zero_area] for arr in [l1, l2, area2, v1, v2, tri_vert]
-            ]
-            thr = thresh * np.sqrt(l1 * l2 / area2)
-            n1 = np.floor(l1 / thr)
-            n2 = np.floor(l2 / thr)
-
-            with mp.Pool() as mp_pool:
-                new_pts = mp_pool.map(sample_single_tri,
-                                      ((n1[i, 0], n2[i, 0], v1[i:i + 1], v2[i:i + 1], tri_vert[i:i + 1, 0]) for i in
-                                       range(len(n1))), chunksize=1024)
-
-            new_pts = np.concatenate(new_pts, axis=0)
-            data_pcd = np.concatenate([vertices, new_pts], axis=0)
-
-        elif args.mode == 'pcd':
-            pbar = tqdm(total=8)
-            pbar.set_description('read data pcd')
-            data_pcd_o3d = o3d.io.read_point_cloud(args.data)
-            data_pcd = np.asarray(data_pcd_o3d.points)
-
-        pbar.update(1)
-        pbar.set_description('random shuffle pcd index')
-        shuffle_rng = np.random.default_rng()
-        shuffle_rng.shuffle(data_pcd, axis=0)
-
-        pbar.update(1)
-        pbar.set_description('downsample pcd')
-        nn_engine = skln.NearestNeighbors(n_neighbors=1, radius=thresh, algorithm='kd_tree', n_jobs=-1)
-        nn_engine.fit(data_pcd)
-        rnn_idxs = nn_engine.radius_neighbors(data_pcd, radius=thresh, return_distance=False)
-        mask = np.ones(data_pcd.shape[0], dtype=np.bool_)
-        for curr, idxs in enumerate(rnn_idxs):
-            if mask[curr]:
-                mask[idxs] = 0
-                mask[curr] = 1
-        data_down = data_pcd[mask]
-
-        pbar.update(1)
-        pbar.set_description('masking data pcd')
-        obs_mask_file = loadmat(f'{args.dataset_dir}/ObsMask/ObsMask{args.scan}_10.mat')
-        ObsMask, BB, Res = [obs_mask_file[attr] for attr in ['ObsMask', 'BB', 'Res']]
-        BB = BB.astype(np.float32)
-
-        patch = args.patch_size
-        inbound = ((data_down >= BB[:1] - patch) & (data_down < BB[1:] + patch * 2)).sum(axis=-1) == 3
-        data_in = data_down[inbound]
-
-        data_grid = np.around((data_in - BB[:1]) / Res).astype(np.int32)
-        grid_inbound = ((data_grid >= 0) & (data_grid < np.expand_dims(ObsMask.shape, 0))).sum(axis=-1) == 3
-        data_grid_in = data_grid[grid_inbound]
-        in_obs = ObsMask[data_grid_in[:, 0], data_grid_in[:, 1], data_grid_in[:, 2]].astype(np.bool_)
-        data_in_obs = data_in[grid_inbound][in_obs]
-
-        pbar.update(1)
-        pbar.set_description('read STL pcd')
-        stl_pcd = o3d.io.read_point_cloud(args.gt)
-        stl = np.asarray(stl_pcd.points)
-
-        pbar.update(1)
-        pbar.set_description('compute data2stl')
-        nn_engine.fit(stl)
-        dist_d2s, idx_d2s = nn_engine.kneighbors(data_in_obs, n_neighbors=1, return_distance=True)
-        max_dist = args.max_dist
-        mean_d2s = dist_d2s[dist_d2s < max_dist].mean()
-
-        precision_1 = len(dist_d2s[dist_d2s < dist_thred1]) / len(dist_d2s)
-        precision_2 = len(dist_d2s[dist_d2s < dist_thred2]) / len(dist_d2s)
-
-        pbar.update(1)
-        pbar.set_description('compute stl2data')
-        ground_plane = loadmat(f'{args.dataset_dir}/ObsMask/Plane{args.scan}.mat')['P']
-
-        stl_hom = np.concatenate([stl, np.ones_like(stl[:, :1])], -1)
-        above = (ground_plane.reshape((1, 4)) * stl_hom).sum(-1) > 0
-
-        stl_above = stl[above]
-
-        nn_engine.fit(data_in)
-        dist_s2d, idx_s2d = nn_engine.kneighbors(stl_above, n_neighbors=1, return_distance=True)
-        mean_s2d = dist_s2d[dist_s2d < max_dist].mean()
-
-        recall_1 = len(dist_s2d[dist_s2d < dist_thred1]) / len(dist_s2d)
-        recall_2 = len(dist_s2d[dist_s2d < dist_thred2]) / len(dist_s2d)
-
-        pbar.update(1)
-        pbar.set_description('visualize error')
-        vis_dist = args.visualize_threshold
-        R = np.array([[1, 0, 0]], dtype=np.float64)
-        G = np.array([[0, 1, 0]], dtype=np.float64)
-        B = np.array([[0, 0, 1]], dtype=np.float64)
-        W = np.array([[1, 1, 1]], dtype=np.float64)
-        data_color = np.tile(B, (data_down.shape[0], 1))
-        data_alpha = dist_d2s.clip(max=vis_dist) / vis_dist
-        data_color[np.where(inbound)[0][grid_inbound][in_obs]] = R * data_alpha + W * (1 - data_alpha)
-        data_color[np.where(inbound)[0][grid_inbound][in_obs][dist_d2s[:, 0] >= max_dist]] = G
-        write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_d2gt.ply', data_down, data_color)
-        stl_color = np.tile(B, (stl.shape[0], 1))
-        stl_alpha = dist_s2d.clip(max=vis_dist) / vis_dist
-        stl_color[np.where(above)[0]] = R * stl_alpha + W * (1 - stl_alpha)
-        stl_color[np.where(above)[0][dist_s2d[:, 0] >= max_dist]] = G
-        write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_gt2d.ply', stl, stl_color)
-
-        pbar.update(1)
-        pbar.set_description('done')
-        pbar.close()
-        over_all = (mean_d2s + mean_s2d) / 2
-
-        fscore_1 = 2 * precision_1 * recall_1 / (precision_1 + recall_1 + 1e-6)
-        fscore_2 = 2 * precision_2 * recall_2 / (precision_2 + recall_2 + 1e-6)
-
-        print(f'over_all: {over_all}; mean_d2gt: {mean_d2s}; mean_gt2d: {mean_s2d}.')
-        print(f'precision_1mm: {precision_1};  recall_1mm: {recall_1};  fscore_1mm: {fscore_1}')
-        print(f'precision_2mm: {precision_2};  recall_2mm: {recall_2};  fscore_2mm: {fscore_2}')
-
-        pparent, stem, ext = get_path_components(args.data)
-        if args.log is None:
-            path_log = os.path.join(pparent, 'eval_result.txt')
-        else:
-            path_log = args.log
-        with open(path_log, 'w+') as fLog:
-            fLog.write(f'over_all {np.round(over_all, 3)} '
-                       f'mean_d2gt {np.round(mean_d2s, 3)} '
-                       f'mean_gt2d {np.round(mean_s2d, 3)} \n'
-                       f'precision_1mm {np.round(precision_1, 3)} '
-                       f'recall_1mm {np.round(recall_1, 3)} '
-                       f'fscore_1mm {np.round(fscore_1, 3)} \n'
-                       f'precision_2mm {np.round(precision_2, 3)} '
-                       f'recall_2mm {np.round(recall_2, 3)} '
-                       f'fscore_2mm {np.round(fscore_2, 3)} \n'
-                       f'[{stem}] \n')
diff --git a/SparseNeuS_demo_v1/exp_runner_generic_blender_val.py b/SparseNeuS_demo_v1/exp_runner_generic_blender_val.py
index 8d2d26c5339e7fc057160239aeb0822542f910f7..7d09a56f3d66935ca26b2690ed637dfb6f51049c 100644
--- a/SparseNeuS_demo_v1/exp_runner_generic_blender_val.py
+++ b/SparseNeuS_demo_v1/exp_runner_generic_blender_val.py
@@ -1,15 +1,12 @@
-import torch
-import torch.nn.functional as F
-from torch.utils.data import DataLoader
-import argparse
 import os
 import logging
+import argparse
 import numpy as np
-import cv2 as cv
-import trimesh
 from shutil import copyfile
+import torch
+from torch.utils.data import DataLoader
 from torch.utils.tensorboard import SummaryWriter
-from icecream import ic
+from rich import print
 from tqdm import tqdm
 from pyhocon import ConfigFactory
 
@@ -17,23 +14,12 @@ import sys
 sys.path.append(os.path.dirname(__file__))
 
 from models.fields import SingleVarianceNetwork
-
 from models.featurenet import FeatureNet
-
 from models.trainer_generic import GenericTrainer
-
 from models.sparse_sdf_network import SparseSdfNetwork
-
 from models.rendering_network import GeneralRenderingNetwork
-
-from datetime import datetime
-
-from data.dtu_general import MVSDatasetDtuPerView
-
-from utils.training_utils import tocuda
 from data.blender_general_narrow_all_eval_new_data import BlenderPerView
 
-from termcolor import colored
 
 from datetime import datetime
 
@@ -45,14 +31,14 @@ class Runner:
         self.device = torch.device('cuda:%d' % local_rank)
         # self.device = torch.device('cuda')
         self.num_devices = torch.cuda.device_count()
-        self.is_continue = is_continue
+        self.is_continue = is_continue or (mode == "export_mesh")
         self.is_restore = is_restore
         self.restore_lod0 = restore_lod0
         self.mode = mode
         self.model_list = []
         self.logger = logging.getLogger('exp_logger')
 
-        print(colored("detected %d GPUs" % self.num_devices, "red"))
+        print("detected %d GPUs" % self.num_devices)
 
         self.conf_path = conf_path
         self.conf = ConfigFactory.parse_file(conf_path)
@@ -63,7 +49,7 @@ class Runner:
         else:
             self.base_exp_dir = self.conf['general.base_exp_dir']
         self.conf['general.base_exp_dir'] = self.base_exp_dir # jha use this when testing
-        print(colored("base_exp_dir:  " + self.base_exp_dir, 'yellow'))
+        print("base_exp_dir: " + self.base_exp_dir)
         os.makedirs(self.base_exp_dir, exist_ok=True)
         self.iter_step = 0
         self.val_step = 0
@@ -121,12 +107,11 @@ class Runner:
                 **self.conf['model.rendering_network_lod1']).to(self.device)
         if self.mode == 'export_mesh' or self.mode == 'val':
             # base_exp_dir_to_store = os.path.join(self.base_exp_dir, '{:%Y_%m_%d_%H_%M_%S}'.format(datetime.now()))
-            print("save mesh to:", os.path.join("../", args.specific_dataset_name))
             base_exp_dir_to_store = os.path.join("../", args.specific_dataset_name) #"../gradio_tmp" # MODIFIED
         else:
             base_exp_dir_to_store = self.base_exp_dir
 
-        print(colored(f"Store in: {base_exp_dir_to_store}", "blue"))
+        print(f"Store in: {base_exp_dir_to_store}")
         # Renderer model
         self.trainer = GenericTrainer(
             self.rendering_network_outside,
@@ -149,7 +134,7 @@ class Runner:
 
         # Load checkpoint
         latest_model_name = None
-        if is_continue:
+        if self.is_continue:
             model_list_raw = os.listdir(os.path.join(self.base_exp_dir, 'checkpoints'))
             model_list = []
             for model_name in model_list_raw:
@@ -238,7 +223,7 @@ class Runner:
         epochs = int(1 + res_step // len(dataloader))
 
         self.adjust_learning_rate()
-        print(colored("starting training learning rate: {:.5f}".format(self.optimizer.param_groups[0]['lr']), "yellow"))
+        print("starting training learning rate: {:.5f}".format(self.optimizer.param_groups[0]['lr']))
 
         background_rgb = None
         if self.use_white_bkgd:
@@ -247,7 +232,7 @@ class Runner:
 
         for epoch_i in range(epochs):
 
-            print(colored("current epoch %d" % epoch_i, 'red'))
+            print("current epoch %d" % epoch_i)
             dataloader = tqdm(dataloader)
 
             for batch in dataloader:
@@ -365,8 +350,8 @@ class Runner:
                             losses_lod1['color_fine_loss'].mean() if losses_lod1 is not None else 0,
                             self.optimizer.param_groups[0]['lr']))
 
-                    print(colored('alpha_inter_ratio_lod0 = {:.4f} alpha_inter_ratio_lod1 = {:.4f}\n'.format(
-                        alpha_inter_ratio_lod0, alpha_inter_ratio_lod1), 'green'))
+                    print('alpha_inter_ratio_lod0 = {:.4f} alpha_inter_ratio_lod1 = {:.4f}\n'.format(
+                        alpha_inter_ratio_lod0, alpha_inter_ratio_lod1))
 
                     if losses_lod0 is not None:
                         # print("[TEST]: weights_sum in print", losses_lod0['weights_sum'].mean())
@@ -469,7 +454,7 @@ class Runner:
                     # 3. load the new state dict
                     network.load_state_dict(pretrained_dict)
                 except:
-                    print(colored(comment + " load fails", 'yellow'))
+                    print(comment + " load fails")
 
         checkpoint = torch.load(os.path.join(self.base_exp_dir, 'checkpoints', checkpoint_name),
                                 map_location=self.device)
@@ -497,7 +482,7 @@ class Runner:
             try:
                 self.optimizer.load_state_dict(checkpoint['optimizer'])
             except:
-                print(colored("load optimizer fails", "yellow"))
+                print("load optimizer fails")
             self.iter_step = checkpoint['iter_step']
             self.val_step = checkpoint['val_step'] if 'val_step' in checkpoint.keys() else 0
 
@@ -532,15 +517,10 @@ class Runner:
         torch.save(checkpoint,
                    os.path.join(self.base_exp_dir, 'checkpoints', 'ckpt_{:0>6d}.pth'.format(self.iter_step)))
 
-    def validate(self, idx=-1, resolution_level=-1):
+    def validate(self, resolution_level=-1):
         # validate image
-
-        ic(self.iter_step, idx)
+        print("iter_step: ", self.iter_step)
         self.logger.info('Validate begin')
-
-        if idx < 0:
-            idx = self.val_step
-            # idx = np.random.randint(len(self.val_dataset))
         self.val_step += 1
 
         try:
@@ -576,16 +556,9 @@ class Runner:
         )
 
 
-    def export_mesh(self, idx=-1, resolution_level=-1):
-        # validate image
-
-        ic(self.iter_step, idx)
+    def export_mesh(self, resolution_level=-1):
+        print("iter_step: ", self.iter_step)
         self.logger.info('Validate begin')
-        import time 
-        start1 = time.time()
-        if idx < 0:
-            idx = self.val_step
-            # idx = np.random.randint(len(self.val_dataset))
         self.val_step += 1
 
         try:
@@ -598,7 +571,6 @@ class Runner:
 
         background_rgb = None
         if self.use_white_bkgd:
-            # background_rgb = torch.ones([1, 3]).to(self.device)
             background_rgb = 1.0
 
         batch['batch_idx'] = torch.tensor([x for x in range(self.batch_size)])
@@ -609,8 +581,6 @@ class Runner:
         else:
             alpha_inter_ratio_lod0 = 1.
         alpha_inter_ratio_lod1 = self.get_alpha_inter_ratio(self.anneal_start_lod1, self.anneal_end_lod1)
-        end1 = time.time()
-        print("time for getting data", end1 - start1)
         self.trainer(
             batch,
             background_rgb=background_rgb,
diff --git a/SparseNeuS_demo_v1/models/patch_projector.py b/SparseNeuS_demo_v1/models/patch_projector.py
index cf9ca424c588e49d754988814233069b2cf127fa..24bb64527a1f9a9a1c6db8cd290d38f65b63b6d4 100644
--- a/SparseNeuS_demo_v1/models/patch_projector.py
+++ b/SparseNeuS_demo_v1/models/patch_projector.py
@@ -208,4 +208,4 @@ def normalize(flow, h, w, clamp=None):
 
 def build_patch_offset(h_patch_size):
     offsets = torch.arange(-h_patch_size, h_patch_size + 1)
-    return torch.stack(torch.meshgrid(offsets, offsets)[::-1], dim=-1).view(1, -1, 2)  # nb_pixels_patch * 2
+    return torch.stack(torch.meshgrid(offsets, offsets, indexing="ij")[::-1], dim=-1).view(1, -1, 2)  # nb_pixels_patch * 2
diff --git a/SparseNeuS_demo_v1/models/rays.py b/SparseNeuS_demo_v1/models/rays.py
index a31df93e727fd79adaaa3e934c67378b611d4ee0..aa45b18df32adc34124687fb06495c1652cb1678 100644
--- a/SparseNeuS_demo_v1/models/rays.py
+++ b/SparseNeuS_demo_v1/models/rays.py
@@ -1,12 +1,7 @@
-import os, torch, cv2, re
+import os, torch
 import numpy as np
 
-from PIL import Image
 import torch.nn.functional as F
-import torchvision.transforms as T
-
-from random import random
-
 
 def build_patch_offset(h_patch_size):
     offsets = torch.arange(-h_patch_size, h_patch_size + 1)
@@ -24,7 +19,7 @@ def gen_rays_from_single_image(H, W, image, intrinsic, c2w, depth=None, mask=Non
     """
     device = image.device
     ys, xs = torch.meshgrid(torch.linspace(0, H - 1, H),
-                            torch.linspace(0, W - 1, W))  # pytorch's meshgrid has indexing='ij'
+                            torch.linspace(0, W - 1, W), indexing="ij")  # pytorch's meshgrid has indexing='ij'
     p = torch.stack([xs, ys, torch.ones_like(ys)], dim=-1)  # H, W, 3
 
     # normalized ndc uv coordinates, (-1, 1)
@@ -86,7 +81,7 @@ def gen_random_rays_from_single_image(H, W, N_rays, image, intrinsic, c2w, depth
         pixels_y_1 = torch.randint(low=0, high=H, size=[N_rays // 4])
 
         ys, xs = torch.meshgrid(torch.linspace(0, H - 1, H),
-                                torch.linspace(0, W - 1, W))  # pytorch's meshgrid has indexing='ij'
+                                torch.linspace(0, W - 1, W), indexing="ij")  # pytorch's meshgrid has indexing='ij'
         p = torch.stack([xs, ys], dim=-1)  # H, W, 2
 
         try:
@@ -292,7 +287,7 @@ def gen_rays_between(c2w_0, c2w_1, intrinsic, ratio, H, W, resolution_level=1):
     l = resolution_level
     tx = torch.linspace(0, W - 1, W // l)
     ty = torch.linspace(0, H - 1, H // l)
-    pixels_x, pixels_y = torch.meshgrid(tx, ty)
+    pixels_x, pixels_y = torch.meshgrid(tx, ty, indexing="ij")
     p = torch.stack([pixels_x, pixels_y, torch.ones_like(pixels_y)], dim=-1).to(device)  # W, H, 3
 
     intrinsic_inv = torch.inverse(intrinsic[:3, :3])
diff --git a/SparseNeuS_demo_v1/models/render_utils.py b/SparseNeuS_demo_v1/models/render_utils.py
index e1d3d8fc4ca7bf5e306733a213dec96a517a71c7..c14d5761234a16a19ed10509f9f0972adaf04c9a 100644
--- a/SparseNeuS_demo_v1/models/render_utils.py
+++ b/SparseNeuS_demo_v1/models/render_utils.py
@@ -1,14 +1,8 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-import numpy as np
-import logging
-import mcubes
-import trimesh
-from icecream import ic
 
 from ops.back_project import cam2pixel
-import pdb
 
 
 def sample_pdf(bins, weights, n_samples, det=False):
diff --git a/SparseNeuS_demo_v1/models/sparse_neus_renderer.py b/SparseNeuS_demo_v1/models/sparse_neus_renderer.py
index 8015669f349f5b61ca1cb234ec2fcdf71cd10407..96ffc7b547e0f83a177a81f36be38375d9cd26fb 100644
--- a/SparseNeuS_demo_v1/models/sparse_neus_renderer.py
+++ b/SparseNeuS_demo_v1/models/sparse_neus_renderer.py
@@ -10,7 +10,6 @@ import torch.nn.functional as F
 import numpy as np
 import logging
 import mcubes
-import trimesh
 from icecream import ic
 from models.render_utils import sample_pdf
 
@@ -21,10 +20,6 @@ from models.fast_renderer import FastRenderer
 
 from models.patch_projector import PatchProjector
 
-from models.rays import gen_rays_between
-
-import pdb
-
 
 class SparseNeuSRenderer(nn.Module):
     """
@@ -898,7 +893,7 @@ class SparseNeuSRenderer(nn.Module):
             for xi, xs in enumerate(X):
                 for yi, ys in enumerate(Y):
                     for zi, zs in enumerate(Z):
-                        xx, yy, zz = torch.meshgrid(xs, ys, zs)
+                        xx, yy, zz = torch.meshgrid(xs, ys, zs, indexing="ij")
                         pts = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1)
 
                         # ! attention, the query function is different for extract geometry and fields
@@ -958,7 +953,7 @@ class SparseNeuSRenderer(nn.Module):
 
         with torch.no_grad():
             ys, xs = torch.meshgrid(torch.linspace(0, H - 1, H),
-                                    torch.linspace(0, W - 1, W))  # pytorch's meshgrid has indexing='ij'
+                                    torch.linspace(0, W - 1, W), indexing="ij")  # pytorch's meshgrid has indexing='ij'
             p = torch.stack([xs, ys, torch.ones_like(ys)], dim=-1)  # H, W, 3
 
             intrinsics_inv = torch.inverse(intrinsics)
diff --git a/SparseNeuS_demo_v1/models/sparse_neus_renderer_normals_new.py b/SparseNeuS_demo_v1/models/sparse_neus_renderer_normals_new.py
deleted file mode 100644
index 34e22aa312312b4fc7e8225e15f1eea5a2de71d1..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/models/sparse_neus_renderer_normals_new.py
+++ /dev/null
@@ -1,992 +0,0 @@
-"""
-The codes are heavily borrowed from NeuS
-"""
-
-import os
-import cv2 as cv
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
-import logging
-import mcubes
-import trimesh
-from icecream import ic
-from models.render_utils import sample_pdf
-
-from models.projector import Projector
-from tsparse.torchsparse_utils import sparse_to_dense_channel
-
-from models.fast_renderer import FastRenderer
-
-from models.patch_projector import PatchProjector
-
-from models.rays import gen_rays_between
-
-import pdb
-
-
-class SparseNeuSRenderer(nn.Module):
-    """
-    conditional neus render;
-    optimize on normalized world space;
-    warped by nn.Module to support DataParallel traning
-    """
-
-    def __init__(self,
-                 rendering_network_outside,
-                 sdf_network,
-                 variance_network,
-                 rendering_network,
-                 n_samples,
-                 n_importance,
-                 n_outside,
-                 perturb,
-                 alpha_type='div',
-                 conf=None
-                 ):
-        super(SparseNeuSRenderer, self).__init__()
-
-        self.conf = conf
-        self.base_exp_dir = conf['general.base_exp_dir']
-
-        # network setups
-        self.rendering_network_outside = rendering_network_outside
-        self.sdf_network = sdf_network
-        self.variance_network = variance_network
-        self.rendering_network = rendering_network
-
-        self.n_samples = n_samples
-        self.n_importance = n_importance
-        self.n_outside = n_outside
-        self.perturb = perturb
-        self.alpha_type = alpha_type
-
-        self.rendering_projector = Projector()  # used to obtain features for generalized rendering
-
-        self.h_patch_size = self.conf.get_int('model.h_patch_size', default=3)
-        self.patch_projector = PatchProjector(self.h_patch_size)
-
-        self.ray_tracer = FastRenderer()  # ray_tracer to extract depth maps from sdf_volume
-
-        # - fitted rendering or general rendering
-        try:
-            self.if_fitted_rendering = self.sdf_network.if_fitted_rendering
-        except:
-            self.if_fitted_rendering = False
-
-    def up_sample(self, rays_o, rays_d, z_vals, sdf, n_importance, inv_variance,
-                  conditional_valid_mask_volume=None):
-        device = rays_o.device
-        batch_size, n_samples = z_vals.shape
-        pts = rays_o[:, None, :] + rays_d[:, None, :] * z_vals[..., :, None]  # n_rays, n_samples, 3
-
-        if conditional_valid_mask_volume is not None:
-            pts_mask = self.get_pts_mask_for_conditional_volume(pts.view(-1, 3), conditional_valid_mask_volume)
-            pts_mask = pts_mask.reshape(batch_size, n_samples)
-            pts_mask = pts_mask[:, :-1] * pts_mask[:, 1:]  # [batch_size, n_samples-1]
-        else:
-            pts_mask = torch.ones([batch_size, n_samples]).to(pts.device)
-
-        sdf = sdf.reshape(batch_size, n_samples)
-        prev_sdf, next_sdf = sdf[:, :-1], sdf[:, 1:]
-        prev_z_vals, next_z_vals = z_vals[:, :-1], z_vals[:, 1:]
-        mid_sdf = (prev_sdf + next_sdf) * 0.5
-        dot_val = None
-        if self.alpha_type == 'uniform':
-            dot_val = torch.ones([batch_size, n_samples - 1]) * -1.0
-        else:
-            dot_val = (next_sdf - prev_sdf) / (next_z_vals - prev_z_vals + 1e-5)
-            prev_dot_val = torch.cat([torch.zeros([batch_size, 1]).to(device), dot_val[:, :-1]], dim=-1)
-            dot_val = torch.stack([prev_dot_val, dot_val], dim=-1)
-            dot_val, _ = torch.min(dot_val, dim=-1, keepdim=False)
-            dot_val = dot_val.clip(-10.0, 0.0) * pts_mask
-        dist = (next_z_vals - prev_z_vals)
-        prev_esti_sdf = mid_sdf - dot_val * dist * 0.5
-        next_esti_sdf = mid_sdf + dot_val * dist * 0.5
-        prev_cdf = torch.sigmoid(prev_esti_sdf * inv_variance)
-        next_cdf = torch.sigmoid(next_esti_sdf * inv_variance)
-        alpha_sdf = (prev_cdf - next_cdf + 1e-5) / (prev_cdf + 1e-5)
-
-        alpha = alpha_sdf
-
-        # - apply pts_mask
-        alpha = pts_mask * alpha
-
-        weights = alpha * torch.cumprod(
-            torch.cat([torch.ones([batch_size, 1]).to(device), 1. - alpha + 1e-7], -1), -1)[:, :-1]
-
-        z_samples = sample_pdf(z_vals, weights, n_importance, det=True).detach()
-        return z_samples
-
-    def cat_z_vals(self, rays_o, rays_d, z_vals, new_z_vals, sdf, lod,
-                   sdf_network, gru_fusion,
-                   # * related to conditional feature
-                   conditional_volume=None,
-                   conditional_valid_mask_volume=None
-                   ):
-        device = rays_o.device
-        batch_size, n_samples = z_vals.shape
-        _, n_importance = new_z_vals.shape
-        pts = rays_o[:, None, :] + rays_d[:, None, :] * new_z_vals[..., :, None]
-
-        if conditional_valid_mask_volume is not None:
-            pts_mask = self.get_pts_mask_for_conditional_volume(pts.view(-1, 3), conditional_valid_mask_volume)
-            pts_mask = pts_mask.reshape(batch_size, n_importance)
-            pts_mask_bool = (pts_mask > 0).view(-1)
-        else:
-            pts_mask = torch.ones([batch_size, n_importance]).to(pts.device)
-
-        new_sdf = torch.ones([batch_size * n_importance, 1]).to(pts.dtype).to(device) * 100
-
-        if torch.sum(pts_mask) > 1:
-            new_outputs = sdf_network.sdf(pts.reshape(-1, 3)[pts_mask_bool], conditional_volume, lod=lod)
-            new_sdf[pts_mask_bool] = new_outputs['sdf_pts_scale%d' % lod]  # .reshape(batch_size, n_importance)
-
-        new_sdf = new_sdf.view(batch_size, n_importance)
-
-        z_vals = torch.cat([z_vals, new_z_vals], dim=-1)
-        sdf = torch.cat([sdf, new_sdf], dim=-1)
-
-        z_vals, index = torch.sort(z_vals, dim=-1)
-        xx = torch.arange(batch_size)[:, None].expand(batch_size, n_samples + n_importance).reshape(-1)
-        index = index.reshape(-1)
-        sdf = sdf[(xx, index)].reshape(batch_size, n_samples + n_importance)
-
-        return z_vals, sdf
-
-    @torch.no_grad()
-    def get_pts_mask_for_conditional_volume(self, pts, mask_volume):
-        """
-
-        :param pts: [N, 3]
-        :param mask_volume: [1, 1, X, Y, Z]
-        :return:
-        """
-        num_pts = pts.shape[0]
-        pts = pts.view(1, 1, 1, num_pts, 3)  # - should be in range (-1, 1)
-
-        pts = torch.flip(pts, dims=[-1])
-
-        pts_mask = F.grid_sample(mask_volume, pts, mode='nearest')  # [1, c, 1, 1, num_pts]
-        pts_mask = pts_mask.view(-1, num_pts).permute(1, 0).contiguous()  # [num_pts, 1]
-
-        return pts_mask
-
-    def render_core(self,
-                    rays_o,
-                    rays_d,
-                    z_vals,
-                    sample_dist,
-                    lod,
-                    sdf_network,
-                    rendering_network,
-                    background_alpha=None,  # - no use here
-                    background_sampled_color=None,  # - no use here
-                    background_rgb=None,  # - no use here
-                    alpha_inter_ratio=0.0,
-                    # * related to conditional feature
-                    conditional_volume=None,
-                    conditional_valid_mask_volume=None,
-                    # * 2d feature maps
-                    feature_maps=None,
-                    color_maps=None,
-                    w2cs=None,
-                    intrinsics=None,
-                    img_wh=None,
-                    query_c2w=None,  # - used for testing
-                    if_general_rendering=True,
-                    if_render_with_grad=True,
-                    # * used for blending mlp rendering network
-                    img_index=None,
-                    rays_uv=None,
-                    # * used for clear bg and fg
-                    bg_num=0
-                    ):
-        device = rays_o.device
-        N_rays = rays_o.shape[0]
-        _, n_samples = z_vals.shape
-        dists = z_vals[..., 1:] - z_vals[..., :-1]
-        dists = torch.cat([dists, torch.Tensor([sample_dist]).expand(dists[..., :1].shape).to(device)], -1)
-
-        mid_z_vals = z_vals + dists * 0.5
-        mid_dists = mid_z_vals[..., 1:] - mid_z_vals[..., :-1]
-
-        pts = rays_o[:, None, :] + rays_d[:, None, :] * mid_z_vals[..., :, None]  # n_rays, n_samples, 3
-        dirs = rays_d[:, None, :].expand(pts.shape)
-
-        pts = pts.reshape(-1, 3)
-        dirs = dirs.reshape(-1, 3)
-
-        # * if conditional_volume is restored from sparse volume, need mask for pts
-        if conditional_valid_mask_volume is not None:
-            pts_mask = self.get_pts_mask_for_conditional_volume(pts, conditional_valid_mask_volume)
-            pts_mask = pts_mask.reshape(N_rays, n_samples).float().detach()
-            pts_mask_bool = (pts_mask > 0).view(-1)
-
-            if torch.sum(pts_mask_bool.float()) < 1:  # ! when render out image, may meet this problem
-                pts_mask_bool[:100] = True
-
-        else:
-            pts_mask = torch.ones([N_rays, n_samples]).to(pts.device)
-        # import ipdb; ipdb.set_trace()
-        # pts_valid = pts[pts_mask_bool]
-        sdf_nn_output = sdf_network.sdf(pts[pts_mask_bool], conditional_volume, lod=lod)
-
-        sdf = torch.ones([N_rays * n_samples, 1]).to(pts.dtype).to(device) * 100
-        sdf[pts_mask_bool] = sdf_nn_output['sdf_pts_scale%d' % lod]  # [N_rays*n_samples, 1]
-        feature_vector_valid = sdf_nn_output['sdf_features_pts_scale%d' % lod]
-        feature_vector = torch.zeros([N_rays * n_samples, feature_vector_valid.shape[1]]).to(pts.dtype).to(device)
-        feature_vector[pts_mask_bool] = feature_vector_valid
-
-        # * estimate alpha from sdf
-        gradients = torch.zeros([N_rays * n_samples, 3]).to(pts.dtype).to(device)
-        # import ipdb; ipdb.set_trace()
-        gradients[pts_mask_bool] = sdf_network.gradient(
-            pts[pts_mask_bool], conditional_volume, lod=lod).squeeze()
-
-        sampled_color_mlp = None
-        rendering_valid_mask_mlp = None
-        sampled_color_patch = None
-        rendering_patch_mask = None
-
-        if self.if_fitted_rendering:  # used for fine-tuning
-            position_latent = sdf_nn_output['sampled_latent_scale%d' % lod]
-            sampled_color_mlp = torch.zeros([N_rays * n_samples, 3]).to(pts.dtype).to(device)
-            sampled_color_mlp_mask = torch.zeros([N_rays * n_samples, 1]).to(pts.dtype).to(device)
-
-            # - extract pixel
-            pts_pixel_color, pts_pixel_mask = self.patch_projector.pixel_warp(
-                pts[pts_mask_bool][:, None, :], color_maps, intrinsics,
-                w2cs, img_wh=None)  # [N_rays * n_samples,1, N_views,  3] , [N_rays*n_samples, 1, N_views]
-            pts_pixel_color = pts_pixel_color[:, 0, :, :]  # [N_rays * n_samples, N_views,  3]
-            pts_pixel_mask = pts_pixel_mask[:, 0, :]  # [N_rays*n_samples, N_views]
-
-            # - extract patch
-            if_patch_blending = False if rays_uv is None else True
-            pts_patch_color, pts_patch_mask = None, None
-            if if_patch_blending:
-                pts_patch_color, pts_patch_mask = self.patch_projector.patch_warp(
-                    pts.reshape([N_rays, n_samples, 3]),
-                    rays_uv, gradients.reshape([N_rays, n_samples, 3]),
-                    color_maps,
-                    intrinsics[0], intrinsics,
-                    query_c2w[0], torch.inverse(w2cs), img_wh=None
-                )  # (N_rays, n_samples, N_src, Npx, 3), (N_rays, n_samples, N_src, Npx)
-                N_src, Npx = pts_patch_mask.shape[2:]
-                pts_patch_color = pts_patch_color.view(N_rays * n_samples, N_src, Npx, 3)[pts_mask_bool]
-                pts_patch_mask = pts_patch_mask.view(N_rays * n_samples, N_src, Npx)[pts_mask_bool]
-
-                sampled_color_patch = torch.zeros([N_rays * n_samples, Npx, 3]).to(device)
-                sampled_color_patch_mask = torch.zeros([N_rays * n_samples, 1]).to(device)
-
-            sampled_color_mlp_, sampled_color_mlp_mask_, \
-            sampled_color_patch_, sampled_color_patch_mask_ = sdf_network.color_blend(
-                pts[pts_mask_bool],
-                position_latent,
-                gradients[pts_mask_bool],
-                dirs[pts_mask_bool],
-                feature_vector[pts_mask_bool],
-                img_index=img_index,
-                pts_pixel_color=pts_pixel_color,
-                pts_pixel_mask=pts_pixel_mask,
-                pts_patch_color=pts_patch_color,
-                pts_patch_mask=pts_patch_mask
-
-            )  # [n, 3], [n, 1]
-            sampled_color_mlp[pts_mask_bool] = sampled_color_mlp_
-            sampled_color_mlp_mask[pts_mask_bool] = sampled_color_mlp_mask_.float()
-            sampled_color_mlp = sampled_color_mlp.view(N_rays, n_samples, 3)
-            sampled_color_mlp_mask = sampled_color_mlp_mask.view(N_rays, n_samples)
-            rendering_valid_mask_mlp = torch.mean(pts_mask * sampled_color_mlp_mask, dim=-1, keepdim=True) > 0.5
-
-            # patch blending
-            if if_patch_blending:
-                sampled_color_patch[pts_mask_bool] = sampled_color_patch_
-                sampled_color_patch_mask[pts_mask_bool] = sampled_color_patch_mask_.float()
-                sampled_color_patch = sampled_color_patch.view(N_rays, n_samples, Npx, 3)
-                sampled_color_patch_mask = sampled_color_patch_mask.view(N_rays, n_samples)
-                rendering_patch_mask = torch.mean(pts_mask * sampled_color_patch_mask, dim=-1,
-                                                  keepdim=True) > 0.5  # [N_rays, 1]
-            else:
-                sampled_color_patch, rendering_patch_mask = None, None
-
-        if if_general_rendering:  # used for general training
-            # [512, 128, 16]; [4, 512, 128, 59]; [4, 512, 128, 4]
-            ren_geo_feats, ren_rgb_feats, ren_ray_diff, ren_mask, _, _ = self.rendering_projector.compute_view_independent(
-                pts.view(N_rays, n_samples, 3),
-                # * 3d geometry feature volumes
-                geometryVolume=conditional_volume[0],
-                geometryVolumeMask=conditional_valid_mask_volume[0],
-                sdf_network=sdf_network,
-                lod=lod,
-                # * 2d rendering feature maps
-                rendering_feature_maps=feature_maps, # [n_views, 56, 256, 256]
-                color_maps=color_maps,
-                w2cs=w2cs,
-                intrinsics=intrinsics,
-                img_wh=img_wh,
-                query_img_idx=0,  # the index of the N_views dim for rendering
-                query_c2w=query_c2w,
-            )
-
-            # (N_rays, n_samples, 3)
-            if if_render_with_grad:
-                # import ipdb; ipdb.set_trace()
-                # [nrays, 3] [nrays, 1]
-                sampled_color, rendering_valid_mask = rendering_network(
-                    ren_geo_feats, ren_rgb_feats, ren_ray_diff, ren_mask)
-                # import ipdb; ipdb.set_trace()
-            else:
-                with torch.no_grad():
-                    sampled_color, rendering_valid_mask = rendering_network(
-                        ren_geo_feats, ren_rgb_feats, ren_ray_diff, ren_mask)
-        else:
-            sampled_color, rendering_valid_mask = None, None
-
-        inv_variance = self.variance_network(feature_vector)[:, :1].clip(1e-6, 1e6)
-
-        true_dot_val = (dirs * gradients).sum(-1, keepdim=True)  # * calculate
-
-        iter_cos = -(F.relu(-true_dot_val * 0.5 + 0.5) * (1.0 - alpha_inter_ratio) + F.relu(
-            -true_dot_val) * alpha_inter_ratio)  # always non-positive
-
-        iter_cos = iter_cos * pts_mask.view(-1, 1)
-
-        true_estimate_sdf_half_next = sdf + iter_cos.clip(-10.0, 10.0) * dists.reshape(-1, 1) * 0.5
-        true_estimate_sdf_half_prev = sdf - iter_cos.clip(-10.0, 10.0) * dists.reshape(-1, 1) * 0.5
-
-        prev_cdf = torch.sigmoid(true_estimate_sdf_half_prev * inv_variance)
-        next_cdf = torch.sigmoid(true_estimate_sdf_half_next * inv_variance)
-
-        p = prev_cdf - next_cdf
-        c = prev_cdf
-
-        if self.alpha_type == 'div':
-            alpha_sdf = ((p + 1e-5) / (c + 1e-5)).reshape(N_rays, n_samples).clip(0.0, 1.0)
-        elif self.alpha_type == 'uniform':
-            uniform_estimate_sdf_half_next = sdf - dists.reshape(-1, 1) * 0.5
-            uniform_estimate_sdf_half_prev = sdf + dists.reshape(-1, 1) * 0.5
-            uniform_prev_cdf = torch.sigmoid(uniform_estimate_sdf_half_prev * inv_variance)
-            uniform_next_cdf = torch.sigmoid(uniform_estimate_sdf_half_next * inv_variance)
-            uniform_alpha = F.relu(
-                (uniform_prev_cdf - uniform_next_cdf + 1e-5) / (uniform_prev_cdf + 1e-5)).reshape(
-                N_rays, n_samples).clip(0.0, 1.0)
-            alpha_sdf = uniform_alpha
-        else:
-            assert False
-
-        alpha = alpha_sdf
-
-        # - apply pts_mask
-        alpha = alpha * pts_mask
-
-        # pts_radius = torch.linalg.norm(pts, ord=2, dim=-1, keepdim=True).reshape(N_rays, n_samples)
-        # inside_sphere = (pts_radius < 1.0).float().detach()
-        # relax_inside_sphere = (pts_radius < 1.2).float().detach()
-        inside_sphere = pts_mask
-        relax_inside_sphere = pts_mask
-
-        weights = alpha * torch.cumprod(torch.cat([torch.ones([N_rays, 1]).to(device), 1. - alpha + 1e-7], -1), -1)[:,
-                          :-1]  # n_rays, n_samples
-        weights_sum = weights.sum(dim=-1, keepdim=True)
-        alpha_sum = alpha.sum(dim=-1, keepdim=True)
-
-        if bg_num > 0:
-            weights_sum_fg = weights[:, :-bg_num].sum(dim=-1, keepdim=True)
-        else:
-            weights_sum_fg = weights_sum
-
-        if sampled_color is not None:
-            color = (sampled_color * weights[:, :, None]).sum(dim=1)
-        else:
-            color = None
-        # import ipdb; ipdb.set_trace()
-
-        if background_rgb is not None and color is not None:
-            color = color + background_rgb * (1.0 - weights_sum)
-            # print("color device:" + str(color.device))
-        # if color is not None:
-        #     # import ipdb; ipdb.set_trace()
-        #     color = color + (1.0 - weights_sum)
-
-
-        ###################*  mlp color rendering  #####################
-        color_mlp = None
-        # import ipdb; ipdb.set_trace()
-        if sampled_color_mlp is not None:
-            color_mlp = (sampled_color_mlp * weights[:, :, None]).sum(dim=1)
-
-        if background_rgb is not None and color_mlp is not None:
-            color_mlp = color_mlp + background_rgb * (1.0 - weights_sum)
-
-        ############################ *  patch blending  ################
-        blended_color_patch = None
-        if sampled_color_patch is not None:
-            blended_color_patch = (sampled_color_patch * weights[:, :, None, None]).sum(dim=1)  # [N_rays, Npx, 3]
-
-        ######################################################
-
-        gradient_error = (torch.linalg.norm(gradients.reshape(N_rays, n_samples, 3), ord=2,
-                                            dim=-1) - 1.0) ** 2
-        # ! the gradient normal should be masked out, the pts out of the bounding box should also be penalized
-        gradient_error = (pts_mask * gradient_error).sum() / (
-                (pts_mask).sum() + 1e-5)
-
-        depth = (mid_z_vals * weights[:, :n_samples]).sum(dim=1, keepdim=True)
-        # print("[TEST]: weights_sum in render_core", weights_sum.mean())
-        # print("[TEST]: weights_sum in render_core NAN number", weights_sum.isnan().sum())
-        # if weights_sum.isnan().sum() > 0:
-        #     import ipdb; ipdb.set_trace()
-        return {
-            'color': color,
-            'color_mask': rendering_valid_mask,  # (N_rays, 1)
-            'color_mlp': color_mlp,
-            'color_mlp_mask': rendering_valid_mask_mlp,
-            'sdf': sdf,  # (N_rays, n_samples)
-            'depth': depth,  # (N_rays, 1)
-            'dists': dists,
-            'gradients': gradients.reshape(N_rays, n_samples, 3),
-            'variance': 1.0 / inv_variance,
-            'mid_z_vals': mid_z_vals,
-            'weights': weights,
-            'weights_sum': weights_sum,
-            'alpha_sum': alpha_sum,
-            'alpha_mean': alpha.mean(),
-            'cdf': c.reshape(N_rays, n_samples),
-            'gradient_error': gradient_error,
-            'inside_sphere': inside_sphere,
-            'blended_color_patch': blended_color_patch,
-            'blended_color_patch_mask': rendering_patch_mask,
-            'weights_sum_fg': weights_sum_fg
-        }
-
-    def render(self, rays_o, rays_d, near, far, sdf_network, rendering_network,
-               perturb_overwrite=-1,
-               background_rgb=None,
-               alpha_inter_ratio=0.0,
-               # * related to conditional feature
-               lod=None,
-               conditional_volume=None,
-               conditional_valid_mask_volume=None,
-               # * 2d feature maps
-               feature_maps=None,
-               color_maps=None,
-               w2cs=None,
-               intrinsics=None,
-               img_wh=None,
-               query_c2w=None,  # -used for testing
-               if_general_rendering=True,
-               if_render_with_grad=True,
-               # * used for blending mlp rendering network
-               img_index=None,
-               rays_uv=None,
-               # * importance sample for second lod network
-               pre_sample=False,  # no use here
-               # * for clear foreground
-               bg_ratio=0.0
-               ):
-        device = rays_o.device
-        N_rays = len(rays_o)
-        # sample_dist = 2.0 / self.n_samples
-        sample_dist = ((far - near) / self.n_samples).mean().item()
-        z_vals = torch.linspace(0.0, 1.0, self.n_samples).to(device)
-        z_vals = near + (far - near) * z_vals[None, :]
-
-        bg_num = int(self.n_samples * bg_ratio)
-
-        if z_vals.shape[0] == 1:
-            z_vals = z_vals.repeat(N_rays, 1)
-
-        if bg_num > 0:
-            z_vals_bg = z_vals[:, self.n_samples - bg_num:]
-            z_vals = z_vals[:, :self.n_samples - bg_num]
-
-        n_samples = self.n_samples - bg_num
-        perturb = self.perturb
-
-        # - significantly speed up training, for the second lod network
-        if pre_sample:
-            z_vals = self.sample_z_vals_from_maskVolume(rays_o, rays_d, near, far,
-                                                        conditional_valid_mask_volume)
-
-        if perturb_overwrite >= 0:
-            perturb = perturb_overwrite
-        if perturb > 0:
-            # get intervals between samples
-            mids = .5 * (z_vals[..., 1:] + z_vals[..., :-1])
-            upper = torch.cat([mids, z_vals[..., -1:]], -1)
-            lower = torch.cat([z_vals[..., :1], mids], -1)
-            # stratified samples in those intervals
-            t_rand = torch.rand(z_vals.shape).to(device)
-            z_vals = lower + (upper - lower) * t_rand
-
-        background_alpha = None
-        background_sampled_color = None
-        z_val_before = z_vals.clone()
-        # Up sample
-        if self.n_importance > 0:
-            with torch.no_grad():
-                pts = rays_o[:, None, :] + rays_d[:, None, :] * z_vals[..., :, None]
-
-                sdf_outputs = sdf_network.sdf(
-                    pts.reshape(-1, 3), conditional_volume, lod=lod)
-                # pdb.set_trace()
-                sdf = sdf_outputs['sdf_pts_scale%d' % lod].reshape(N_rays, self.n_samples - bg_num)
-
-                n_steps = 4
-                for i in range(n_steps):
-                    new_z_vals = self.up_sample(rays_o, rays_d, z_vals, sdf, self.n_importance // n_steps,
-                                                64 * 2 ** i,
-                                                conditional_valid_mask_volume=conditional_valid_mask_volume,
-                                                )
-
-                    # if new_z_vals.isnan().sum() > 0:
-                    #     import ipdb; ipdb.set_trace()
-
-                    z_vals, sdf = self.cat_z_vals(
-                        rays_o, rays_d, z_vals, new_z_vals, sdf, lod,
-                        sdf_network, gru_fusion=False,
-                        conditional_volume=conditional_volume,
-                        conditional_valid_mask_volume=conditional_valid_mask_volume,
-                    )
-
-                del sdf
-
-            n_samples = self.n_samples + self.n_importance
-
-        # Background
-        ret_outside = None
-
-        # Render
-        if bg_num > 0:
-            z_vals = torch.cat([z_vals, z_vals_bg], dim=1)
-        # if z_vals.isnan().sum() > 0:
-        #     import ipdb; ipdb.set_trace()
-        ret_fine = self.render_core(rays_o,
-                                    rays_d,
-                                    z_vals,
-                                    sample_dist,
-                                    lod,
-                                    sdf_network,
-                                    rendering_network,
-                                    background_rgb=background_rgb,
-                                    background_alpha=background_alpha,
-                                    background_sampled_color=background_sampled_color,
-                                    alpha_inter_ratio=alpha_inter_ratio,
-                                    # * related to conditional feature
-                                    conditional_volume=conditional_volume,
-                                    conditional_valid_mask_volume=conditional_valid_mask_volume,
-                                    # * 2d feature maps
-                                    feature_maps=feature_maps,
-                                    color_maps=color_maps,
-                                    w2cs=w2cs,
-                                    intrinsics=intrinsics,
-                                    img_wh=img_wh,
-                                    query_c2w=query_c2w,
-                                    if_general_rendering=if_general_rendering,
-                                    if_render_with_grad=if_render_with_grad,
-                                    # * used for blending mlp rendering network
-                                    img_index=img_index,
-                                    rays_uv=rays_uv
-                                    )
-
-        color_fine = ret_fine['color']
-
-        if self.n_outside > 0:
-            color_fine_mask = torch.logical_or(ret_fine['color_mask'], ret_outside['color_mask'])
-        else:
-            color_fine_mask = ret_fine['color_mask']
-
-        weights = ret_fine['weights']
-        weights_sum = ret_fine['weights_sum']
-
-        gradients = ret_fine['gradients']
-        mid_z_vals = ret_fine['mid_z_vals']
-
-        # depth = (mid_z_vals * weights[:, :n_samples]).sum(dim=1, keepdim=True)
-        depth = ret_fine['depth']
-        depth_varaince = ((mid_z_vals - depth) ** 2 * weights[:, :n_samples]).sum(dim=-1, keepdim=True)
-        variance = ret_fine['variance'].reshape(N_rays, n_samples).mean(dim=-1, keepdim=True)
-
-        # - randomly sample points from the volume, and maximize the sdf
-        pts_random = torch.rand([1024, 3]).float().to(device) * 2 - 1  # normalized to (-1, 1)
-        sdf_random = sdf_network.sdf(pts_random, conditional_volume, lod=lod)['sdf_pts_scale%d' % lod]
-
-        result = {
-            'depth': depth,
-            'color_fine': color_fine,
-            'color_fine_mask': color_fine_mask,
-            'color_outside': ret_outside['color'] if ret_outside is not None else None,
-            'color_outside_mask': ret_outside['color_mask'] if ret_outside is not None else None,
-            'color_mlp': ret_fine['color_mlp'],
-            'color_mlp_mask': ret_fine['color_mlp_mask'],
-            'variance': variance.mean(),
-            'cdf_fine': ret_fine['cdf'],
-            'depth_variance': depth_varaince,
-            'weights_sum': weights_sum,
-            'weights_max': torch.max(weights, dim=-1, keepdim=True)[0],
-            'alpha_sum': ret_fine['alpha_sum'].mean(),
-            'alpha_mean': ret_fine['alpha_mean'],
-            'gradients': gradients,
-            'weights': weights,
-            'gradient_error_fine': ret_fine['gradient_error'],
-            'inside_sphere': ret_fine['inside_sphere'],
-            'sdf': ret_fine['sdf'],
-            'sdf_random': sdf_random,
-            'blended_color_patch': ret_fine['blended_color_patch'],
-            'blended_color_patch_mask': ret_fine['blended_color_patch_mask'],
-            'weights_sum_fg': ret_fine['weights_sum_fg']
-        }
-
-        return result
-
-    @torch.no_grad()
-    def sample_z_vals_from_sdfVolume(self, rays_o, rays_d, near, far, sdf_volume, mask_volume):
-        # ? based on sdf to do importance sampling, seems that too biased on pre-estimation
-        device = rays_o.device
-        N_rays = len(rays_o)
-        n_samples = self.n_samples * 2
-
-        z_vals = torch.linspace(0.0, 1.0, n_samples).to(device)
-        z_vals = near + (far - near) * z_vals[None, :]
-
-        if z_vals.shape[0] == 1:
-            z_vals = z_vals.repeat(N_rays, 1)
-
-        pts = rays_o[:, None, :] + rays_d[:, None, :] * z_vals[..., :, None]
-
-        sdf = self.get_pts_mask_for_conditional_volume(pts.view(-1, 3), sdf_volume).reshape([N_rays, n_samples])
-
-        new_z_vals = self.up_sample(rays_o, rays_d, z_vals, sdf, self.n_samples,
-                                    200,
-                                    conditional_valid_mask_volume=mask_volume,
-                                    )
-        return new_z_vals
-
-    @torch.no_grad()
-    def sample_z_vals_from_maskVolume(self, rays_o, rays_d, near, far, mask_volume):  # don't use
-        device = rays_o.device
-        N_rays = len(rays_o)
-        n_samples = self.n_samples * 2
-
-        z_vals = torch.linspace(0.0, 1.0, n_samples).to(device)
-        z_vals = near + (far - near) * z_vals[None, :]
-
-        if z_vals.shape[0] == 1:
-            z_vals = z_vals.repeat(N_rays, 1)
-
-        mid_z_vals = (z_vals[:, 1:] + z_vals[:, :-1]) * 0.5
-
-        pts = rays_o[:, None, :] + rays_d[:, None, :] * mid_z_vals[..., :, None]
-
-        pts_mask = self.get_pts_mask_for_conditional_volume(pts.view(-1, 3), mask_volume).reshape(
-            [N_rays, n_samples - 1])
-
-        # empty voxel set to 0.1, non-empty voxel set to 1
-        weights = torch.where(pts_mask > 0, torch.ones_like(pts_mask).to(device),
-                              0.1 * torch.ones_like(pts_mask).to(device))
-
-        # sample more pts in non-empty voxels
-        z_samples = sample_pdf(z_vals, weights, self.n_samples, det=True).detach()
-        return z_samples
-
-    @torch.no_grad()
-    def filter_pts_by_depthmaps(self, coords, pred_depth_maps, proj_matrices,
-                                partial_vol_origin, voxel_size,
-                                near, far, depth_interval, d_plane_nums):
-        """
-        Use the pred_depthmaps to remove redundant pts (pruned by sdf, sdf always have two sides, the back side is useless)
-        :param coords: [n, 3]  int coords
-        :param pred_depth_maps: [N_views, 1, h, w]
-        :param proj_matrices: [N_views, 4, 4]
-        :param partial_vol_origin: [3]
-        :param voxel_size: 1
-        :param near: 1
-        :param far: 1
-        :param depth_interval: 1
-        :param d_plane_nums: 1
-        :return:
-        """
-        device = pred_depth_maps.device
-        n_views, _, sizeH, sizeW = pred_depth_maps.shape
-
-        if len(partial_vol_origin.shape) == 1:
-            partial_vol_origin = partial_vol_origin[None, :]
-        pts = coords * voxel_size + partial_vol_origin
-
-        rs_grid = pts.unsqueeze(0).expand(n_views, -1, -1)
-        rs_grid = rs_grid.permute(0, 2, 1).contiguous()  # [n_views, 3, n_pts]
-        nV = rs_grid.shape[-1]
-        rs_grid = torch.cat([rs_grid, torch.ones([n_views, 1, nV]).to(device)], dim=1)  # [n_views, 4, n_pts]
-
-        # Project grid
-        im_p = proj_matrices @ rs_grid  # - transform world pts to image UV space   # [n_views, 4, n_pts]
-        im_x, im_y, im_z = im_p[:, 0], im_p[:, 1], im_p[:, 2]
-        im_x = im_x / im_z
-        im_y = im_y / im_z
-
-        im_grid = torch.stack([2 * im_x / (sizeW - 1) - 1, 2 * im_y / (sizeH - 1) - 1], dim=-1)
-
-        im_grid = im_grid.view(n_views, 1, -1, 2)
-        sampled_depths = torch.nn.functional.grid_sample(pred_depth_maps, im_grid, mode='bilinear',
-                                                         padding_mode='zeros',
-                                                         align_corners=True)[:, 0, 0, :]  # [n_views, n_pts]
-        sampled_depths_valid = (sampled_depths > 0.5 * near).float()
-        valid_d_min = (sampled_depths - d_plane_nums * depth_interval).clamp(near.item(),
-                                                                             far.item()) * sampled_depths_valid
-        valid_d_max = (sampled_depths + d_plane_nums * depth_interval).clamp(near.item(),
-                                                                             far.item()) * sampled_depths_valid
-
-        mask = im_grid.abs() <= 1
-        mask = mask[:, 0]  # [n_views, n_pts, 2]
-        mask = (mask.sum(dim=-1) == 2) & (im_z > valid_d_min) & (im_z < valid_d_max)
-
-        mask = mask.view(n_views, -1)
-        mask = mask.permute(1, 0).contiguous()  # [num_pts, nviews]
-
-        mask_final = torch.sum(mask.float(), dim=1, keepdim=False) > 0
-
-        return mask_final
-
-    @torch.no_grad()
-    def get_valid_sparse_coords_by_sdf_depthfilter(self, sdf_volume, coords_volume, mask_volume, feature_volume,
-                                                   pred_depth_maps, proj_matrices,
-                                                   partial_vol_origin, voxel_size,
-                                                   near, far, depth_interval, d_plane_nums,
-                                                   threshold=0.02, maximum_pts=110000):
-        """
-        assume batch size == 1, from the first lod to get sparse voxels
-        :param sdf_volume: [1, X, Y, Z]
-        :param coords_volume: [3, X, Y, Z]
-        :param mask_volume: [1, X, Y, Z]
-        :param feature_volume: [C, X, Y, Z]
-        :param threshold:
-        :return:
-        """
-        device = coords_volume.device
-        _, dX, dY, dZ = coords_volume.shape
-
-        def prune(sdf_pts, coords_pts, mask_volume, threshold):
-            occupancy_mask = (torch.abs(sdf_pts) < threshold).squeeze(1)  # [num_pts]
-            valid_coords = coords_pts[occupancy_mask]
-
-            # - filter backside surface by depth maps
-            mask_filtered = self.filter_pts_by_depthmaps(valid_coords, pred_depth_maps, proj_matrices,
-                                                         partial_vol_origin, voxel_size,
-                                                         near, far, depth_interval, d_plane_nums)
-            valid_coords = valid_coords[mask_filtered]
-
-            # - dilate
-            occupancy_mask = sparse_to_dense_channel(valid_coords, 1, [dX, dY, dZ], 1, 0, device)  # [dX, dY, dZ, 1]
-
-            # - dilate
-            occupancy_mask = occupancy_mask.float()
-            occupancy_mask = occupancy_mask.view(1, 1, dX, dY, dZ)
-            occupancy_mask = F.avg_pool3d(occupancy_mask, kernel_size=7, stride=1, padding=3)
-            occupancy_mask = occupancy_mask.view(-1, 1) > 0
-
-            final_mask = torch.logical_and(mask_volume, occupancy_mask)[:, 0]  # [num_pts]
-
-            return final_mask, torch.sum(final_mask.float())
-
-        C, dX, dY, dZ = feature_volume.shape
-        sdf_volume = sdf_volume.permute(1, 2, 3, 0).contiguous().view(-1, 1)
-        coords_volume = coords_volume.permute(1, 2, 3, 0).contiguous().view(-1, 3)
-        mask_volume = mask_volume.permute(1, 2, 3, 0).contiguous().view(-1, 1)
-        feature_volume = feature_volume.permute(1, 2, 3, 0).contiguous().view(-1, C)
-
-        # - for check
-        # sdf_volume = torch.rand_like(sdf_volume).float().to(sdf_volume.device) * 0.02
-
-        final_mask, valid_num = prune(sdf_volume, coords_volume, mask_volume, threshold)
-
-        while (valid_num > maximum_pts) and (threshold > 0.003):
-            threshold = threshold - 0.002
-            final_mask, valid_num = prune(sdf_volume, coords_volume, mask_volume, threshold)
-
-        valid_coords = coords_volume[final_mask]  # [N, 3]
-        valid_feature = feature_volume[final_mask]  # [N, C]
-
-        valid_coords = torch.cat([torch.ones([valid_coords.shape[0], 1]).to(valid_coords.device) * 0,
-                                  valid_coords], dim=1)  # [N, 4], append batch idx
-
-        # ! if the valid_num is still larger than maximum_pts, sample part of pts
-        if valid_num > maximum_pts:
-            valid_num = valid_num.long()
-            occupancy = torch.ones([valid_num]).to(device) > 0
-            choice = np.random.choice(valid_num.cpu().numpy(), valid_num.cpu().numpy() - maximum_pts,
-                                      replace=False)
-            ind = torch.nonzero(occupancy).to(device)
-            occupancy[ind[choice]] = False
-            valid_coords = valid_coords[occupancy]
-            valid_feature = valid_feature[occupancy]
-
-            print(threshold, "randomly sample to save memory")
-
-        return valid_coords, valid_feature
-
-    @torch.no_grad()
-    def get_valid_sparse_coords_by_sdf(self, sdf_volume, coords_volume, mask_volume, feature_volume, threshold=0.02,
-                                       maximum_pts=110000):
-        """
-        assume batch size == 1, from the first lod to get sparse voxels
-        :param sdf_volume: [num_pts, 1]
-        :param coords_volume: [3, X, Y, Z]
-        :param mask_volume: [1, X, Y, Z]
-        :param feature_volume: [C, X, Y, Z]
-        :param threshold:
-        :return:
-        """
-
-        def prune(sdf_volume, mask_volume, threshold):
-            occupancy_mask = torch.abs(sdf_volume) < threshold  # [num_pts, 1]
-
-            # - dilate
-            occupancy_mask = occupancy_mask.float()
-            occupancy_mask = occupancy_mask.view(1, 1, dX, dY, dZ)
-            occupancy_mask = F.avg_pool3d(occupancy_mask, kernel_size=7, stride=1, padding=3)
-            occupancy_mask = occupancy_mask.view(-1, 1) > 0
-
-            final_mask = torch.logical_and(mask_volume, occupancy_mask)[:, 0]  # [num_pts]
-
-            return final_mask, torch.sum(final_mask.float())
-
-        C, dX, dY, dZ = feature_volume.shape
-        coords_volume = coords_volume.permute(1, 2, 3, 0).contiguous().view(-1, 3)
-        mask_volume = mask_volume.permute(1, 2, 3, 0).contiguous().view(-1, 1)
-        feature_volume = feature_volume.permute(1, 2, 3, 0).contiguous().view(-1, C)
-
-        final_mask, valid_num = prune(sdf_volume, mask_volume, threshold)
-
-        while (valid_num > maximum_pts) and (threshold > 0.003):
-            threshold = threshold - 0.002
-            final_mask, valid_num = prune(sdf_volume, mask_volume, threshold)
-
-        valid_coords = coords_volume[final_mask]  # [N, 3]
-        valid_feature = feature_volume[final_mask]  # [N, C]
-
-        valid_coords = torch.cat([torch.ones([valid_coords.shape[0], 1]).to(valid_coords.device) * 0,
-                                  valid_coords], dim=1)  # [N, 4], append batch idx
-
-        # ! if the valid_num is still larger than maximum_pts, sample part of pts
-        if valid_num > maximum_pts:
-            device = sdf_volume.device
-            valid_num = valid_num.long()
-            occupancy = torch.ones([valid_num]).to(device) > 0
-            choice = np.random.choice(valid_num.cpu().numpy(), valid_num.cpu().numpy() - maximum_pts,
-                                      replace=False)
-            ind = torch.nonzero(occupancy).to(device)
-            occupancy[ind[choice]] = False
-            valid_coords = valid_coords[occupancy]
-            valid_feature = valid_feature[occupancy]
-
-            print(threshold, "randomly sample to save memory")
-
-        return valid_coords, valid_feature
-
-    @torch.no_grad()
-    def extract_fields(self, bound_min, bound_max, resolution, query_func, device,
-                       # * related to conditional feature
-                       **kwargs
-                       ):
-        N = 64
-        X = torch.linspace(bound_min[0], bound_max[0], resolution).split(N)
-        Y = torch.linspace(bound_min[1], bound_max[1], resolution).split(N)
-        Z = torch.linspace(bound_min[2], bound_max[2], resolution).split(N)
-
-        u = np.zeros([resolution, resolution, resolution], dtype=np.float32)
-        with torch.no_grad():
-            for xi, xs in enumerate(X):
-                for yi, ys in enumerate(Y):
-                    for zi, zs in enumerate(Z):
-                        xx, yy, zz = torch.meshgrid(xs, ys, zs)
-                        pts = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1), zz.reshape(-1, 1)], dim=-1).to(device)
-
-                        # ! attention, the query function is different for extract geometry and fields
-                        output = query_func(pts, **kwargs)
-                        sdf = output['sdf_pts_scale%d' % kwargs['lod']].reshape(len(xs), len(ys),
-                                                                                len(zs)).detach().cpu().numpy()
-
-                        u[xi * N: xi * N + len(xs), yi * N: yi * N + len(ys), zi * N: zi * N + len(zs)] = -1 * sdf
-        return u
-
-    @torch.no_grad()
-    def extract_geometry(self, sdf_network, bound_min, bound_max, resolution, threshold, device, occupancy_mask=None,
-                         # * 3d feature volume
-                         **kwargs
-                         ):
-        # logging.info('threshold: {}'.format(threshold))
-
-        u = self.extract_fields(bound_min, bound_max, resolution,
-                                lambda pts, **kwargs: sdf_network.sdf(pts, **kwargs),
-                                # - sdf need to be multiplied by -1
-                                device,
-                                # * 3d feature volume
-                                **kwargs
-                                )
-        if occupancy_mask is not None:
-            dX, dY, dZ = occupancy_mask.shape
-            empty_mask = 1 - occupancy_mask
-            empty_mask = empty_mask.view(1, 1, dX, dY, dZ)
-            # - dilation
-            # empty_mask = F.avg_pool3d(empty_mask, kernel_size=7, stride=1, padding=3)
-            empty_mask = F.interpolate(empty_mask, [resolution, resolution, resolution], mode='nearest')
-            empty_mask = empty_mask.view(resolution, resolution, resolution).cpu().numpy() > 0
-            u[empty_mask] = -100
-            del empty_mask
-
-        vertices, triangles = mcubes.marching_cubes(u, threshold)
-        b_max_np = bound_max.detach().cpu().numpy()
-        b_min_np = bound_min.detach().cpu().numpy()
-
-        vertices = vertices / (resolution - 1.0) * (b_max_np - b_min_np)[None, :] + b_min_np[None, :]
-        return vertices, triangles, u
-
-    @torch.no_grad()
-    def extract_depth_maps(self, sdf_network, con_volume, intrinsics, c2ws, H, W, near, far):
-        """
-        extract depth maps from the density volume
-        :param con_volume: [1, 1+C, dX, dY, dZ]  can by con_volume or sdf_volume
-        :param c2ws: [B, 4, 4]
-        :param H:
-        :param W:
-        :param near:
-        :param far:
-        :return:
-        """
-        device = con_volume.device
-        batch_size = intrinsics.shape[0]
-
-        with torch.no_grad():
-            ys, xs = torch.meshgrid(torch.linspace(0, H - 1, H),
-                                    torch.linspace(0, W - 1, W))  # pytorch's meshgrid has indexing='ij'
-            p = torch.stack([xs, ys, torch.ones_like(ys)], dim=-1)  # H, W, 3
-
-            intrinsics_inv = torch.inverse(intrinsics)
-
-            p = p.view(-1, 3).float().to(device)  # N_rays, 3
-            p = torch.matmul(intrinsics_inv[:, None, :3, :3], p[:, :, None]).squeeze()  # Batch, N_rays, 3
-            rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True)  # Batch, N_rays, 3
-            rays_v = torch.matmul(c2ws[:, None, :3, :3], rays_v[:, :, :, None]).squeeze()  # Batch, N_rays, 3
-            rays_o = c2ws[:, None, :3, 3].expand(rays_v.shape)  # Batch, N_rays, 3
-            rays_d = rays_v
-
-        rays_o = rays_o.contiguous().view(-1, 3)
-        rays_d = rays_d.contiguous().view(-1, 3)
-
-        ################## - sphere tracer to extract depth maps               ######################
-        depth_masks_sphere, depth_maps_sphere = self.ray_tracer.extract_depth_maps(
-            rays_o, rays_d,
-            near[None, :].repeat(rays_o.shape[0], 1),
-            far[None, :].repeat(rays_o.shape[0], 1),
-            sdf_network, con_volume
-        )
-
-        depth_maps = depth_maps_sphere.view(batch_size, 1, H, W)
-        depth_masks = depth_masks_sphere.view(batch_size, 1, H, W)
-
-        depth_maps = torch.where(depth_masks, depth_maps,
-                                 torch.zeros_like(depth_masks.float()).to(device))  # fill invalid pixels by 0
-
-        return depth_maps, depth_masks
diff --git a/SparseNeuS_demo_v1/models/trainer_finetune.py b/SparseNeuS_demo_v1/models/trainer_finetune.py
deleted file mode 100644
index e6203976b2a72dea61e1e728a3b1a225366f56a2..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/models/trainer_finetune.py
+++ /dev/null
@@ -1,979 +0,0 @@
-"""
-Trainer for fine-tuning
-"""
-import os
-import cv2 as cv
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-import numpy as np
-import logging
-import mcubes
-import trimesh
-from icecream import ic
-from models.render_utils import sample_pdf
-from utils.misc_utils import visualize_depth_numpy
-
-from utils.training_utils import tocuda, numpy2tensor
-from loss.depth_metric import compute_depth_errors
-from loss.color_loss import OcclusionColorLoss, OcclusionColorPatchLoss
-from loss.depth_loss import DepthLoss, DepthSmoothLoss
-
-from models.projector import Projector
-
-from models.rays import gen_rays_between
-
-from models.sparse_neus_renderer import SparseNeuSRenderer
-
-import pdb
-
-
-class FinetuneTrainer(nn.Module):
-    """
-    Trainer used for fine-tuning
-    """
-
-    def __init__(self,
-                 rendering_network_outside,
-                 pyramid_feature_network_lod0,
-                 pyramid_feature_network_lod1,
-                 sdf_network_lod0,
-                 sdf_network_lod1,
-                 variance_network_lod0,
-                 variance_network_lod1,
-                 sdf_network_finetune,
-                 finetune_lod,  # which lod fine-tuning use
-                 n_samples,
-                 n_importance,
-                 n_outside,
-                 perturb,
-                 alpha_type='div',
-                 conf=None
-                 ):
-        super(FinetuneTrainer, self).__init__()
-
-        self.conf = conf
-        self.base_exp_dir = conf['general.base_exp_dir']
-
-        self.finetune_lod = finetune_lod
-
-        self.anneal_start = self.conf.get_float('train.anneal_start', default=0.0)
-        self.anneal_end = self.conf.get_float('train.anneal_end', default=0.0)
-        self.end_iter = self.conf.get_int('train.end_iter')
-
-        # network setups
-        self.rendering_network_outside = rendering_network_outside
-        self.pyramid_feature_network_geometry_lod0 = pyramid_feature_network_lod0  # 2D pyramid feature network for geometry
-        self.pyramid_feature_network_geometry_lod1 = pyramid_feature_network_lod1  # use differnet networks for the two lods
-
-        self.sdf_network_lod0 = sdf_network_lod0  # the first lod is density_network
-        self.sdf_network_lod1 = sdf_network_lod1
-
-        # - warpped by ModuleList to support DataParallel
-        self.variance_network_lod0 = variance_network_lod0
-        self.variance_network_lod1 = variance_network_lod1
-        self.variance_network_finetune = variance_network_lod0 if self.finetune_lod == 0 else variance_network_lod1
-
-        self.sdf_network_finetune = sdf_network_finetune
-
-        self.n_samples = n_samples
-        self.n_importance = n_importance
-        self.n_outside = n_outside
-        self.perturb = perturb
-        self.alpha_type = alpha_type
-
-        self.sdf_renderer_finetune = SparseNeuSRenderer(
-            self.rendering_network_outside,
-            self.sdf_network_finetune,
-            self.variance_network_finetune,
-            None,  # rendering_network
-            self.n_samples,
-            self.n_importance,
-            self.n_outside,
-            self.perturb,
-            alpha_type='div',
-            conf=self.conf)
-
-        # sdf network weights
-        self.sdf_igr_weight = self.conf.get_float('train.sdf_igr_weight')
-        self.sdf_sparse_weight = self.conf.get_float('train.sdf_sparse_weight', default=0)
-
-        self.sdf_decay_param = self.conf.get_float('train.sdf_decay_param', default=100)
-        self.color_pixel_weight = self.conf.get_float('train.color_pixel_weight', default=1.0)
-        self.color_patch_weight = self.conf.get_float('train.color_patch_weight', default=0.)
-        self.tv_weight = self.conf.get_float('train.tv_weight', default=0.001)  # no use
-        self.visibility_beta = self.conf.get_float('train.visibility_beta', default=0.025)
-        self.visibility_gama = self.conf.get_float('train.visibility_gama', default=0.015)
-        self.visibility_penalize_ratio = self.conf.get_float('train.visibility_penalize_ratio', default=0.8)
-        self.visibility_weight_thred = self.conf.get_list('train.visibility_weight_thred', default=[0.7])
-        self.if_visibility_aware = self.conf.get_bool('train.if_visibility_aware', default=True)
-        self.train_from_scratch = self.conf.get_bool('train.train_from_scratch', default=False)
-
-        self.depth_criterion = DepthLoss()
-        self.depth_smooth_criterion = DepthSmoothLoss()
-        self.occlusion_color_criterion = OcclusionColorLoss(beta=self.visibility_beta,
-                                                            gama=self.visibility_gama,
-                                                            weight_thred=self.visibility_weight_thred,
-                                                            occlusion_aware=self.if_visibility_aware)
-        self.occlusion_color_patch_criterion = OcclusionColorPatchLoss(
-            type=self.conf.get_string('train.patch_loss_type', default='ncc'),
-            h_patch_size=self.conf.get_int('model.h_patch_size', default=5),
-            beta=self.visibility_beta, gama=self.visibility_gama,
-            weight_thred=self.visibility_weight_thred,
-            occlusion_aware=self.if_visibility_aware
-        )
-
-        # self.iter_step = 0
-        self.val_mesh_freq = self.conf.get_int('train.val_mesh_freq')
-
-        # - True if fine-tuning
-        self.if_fitted_rendering = self.conf.get_bool('train.if_fitted_rendering', default=False)
-
-    def get_trainable_params(self):
-        # set trainable params
-
-        params = []
-        faster_params = []
-        slower_params = []
-
-        params += self.variance_network_finetune.parameters()
-        slower_params += self.sdf_network_finetune.sparse_volume_lod0.parameters()
-        params += self.sdf_network_finetune.sdf_layer.parameters()
-
-        faster_params += self.sdf_network_finetune.renderer.parameters()
-
-        self.params_to_train = {
-            'slower_params': slower_params,
-            'params': params,
-            'faster_params': faster_params
-        }
-
-        return self.params_to_train
-
-    @torch.no_grad()
-    def prepare_con_volume(self, sample):
-        # * only support batch_size==1
-        sizeW = sample['img_wh'][0]
-        sizeH = sample['img_wh'][1]
-        partial_vol_origin = sample['partial_vol_origin'][None, :]  # [B, 3]
-        near, far = sample['near_fars'][0, :1], sample['near_fars'][0, 1:]
-        near = 0.8 * near
-        far = 1.2 * far
-
-        imgs = sample['images']
-        intrinsics = sample['intrinsics']
-        intrinsics_l_4x = intrinsics.clone()
-        intrinsics_l_4x[:, :2] *= 0.25
-        w2cs = sample['w2cs']
-        c2ws = sample['c2ws']
-        proj_matrices = sample['affine_mats'][None, :, :, :]
-
-        # ***********************     Lod==0     ***********************
-
-        with torch.no_grad():
-            geometry_feature_maps = self.obtain_pyramid_feature_maps(imgs)
-            # import ipdb; ipdb.set_trace()
-            conditional_features_lod0 = self.sdf_network_lod0.get_conditional_volume(
-                feature_maps=geometry_feature_maps[None, :, :, :, :],
-                partial_vol_origin=partial_vol_origin,
-                proj_mats=proj_matrices,
-                sizeH=sizeH,
-                sizeW=sizeW,
-                lod=0,
-            )
-
-        con_volume_lod0 = conditional_features_lod0['dense_volume_scale0']
-
-        con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0']
-        coords_lod0 = conditional_features_lod0['coords_scale0']  # [1,3,wX,wY,wZ]
-
-        if self.finetune_lod == 0:
-            return con_volume_lod0, con_valid_mask_volume_lod0, coords_lod0
-
-        # * extract depth maps for all the images for adaptive rendering_network
-        depth_maps_lod0, depth_masks_lod0 = None, None
-        if self.finetune_lod == 1:
-            sdf_volume_lod0 = self.sdf_network_lod0.get_sdf_volume(
-                con_volume_lod0, con_valid_mask_volume_lod0,
-                coords_lod0, partial_vol_origin)  # [1, 1, dX, dY, dZ]
-
-        if self.finetune_lod == 1:
-            geometry_feature_maps_lod1 = self.obtain_pyramid_feature_maps(imgs, lod=1)
-
-            pre_coords, pre_feats = self.sdf_renderer_finetune.get_valid_sparse_coords_by_sdf(
-                sdf_volume_lod0[0], coords_lod0[0], con_valid_mask_volume_lod0[0], con_volume_lod0[0],
-                maximum_pts=200000)
-
-            pre_coords[:, 1:] = pre_coords[:, 1:] * 2
-
-            conditional_features_lod1 = self.sdf_network_lod1.get_conditional_volume(
-                feature_maps=geometry_feature_maps_lod1[None, :, :, :, :],
-                partial_vol_origin=partial_vol_origin,
-                proj_mats=proj_matrices,
-                sizeH=sizeH,
-                sizeW=sizeW,
-                pre_coords=pre_coords,
-                pre_feats=pre_feats
-            )
-
-            con_volume_lod1 = conditional_features_lod1['dense_volume_scale1']
-            con_valid_mask_volume_lod1 = conditional_features_lod1['valid_mask_volume_scale1']
-            coords_lod1 = conditional_features_lod1['coords_scale1']  # [1,3,wX,wY,wZ]
-            con_valid_mask_volume_lod0 = F.interpolate(con_valid_mask_volume_lod0, scale_factor=2)
-
-        return con_volume_lod1, con_valid_mask_volume_lod1, coords_lod1
-
-    def initialize_finetune_network(self, sample, sparse_con_volume=None, sparse_coords_volume=None,
-                                    train_from_scratch=False):
-
-        if not train_from_scratch:
-            if sparse_con_volume is None:  # if the
-
-                con_volume, con_mask_volume, _ = self.prepare_con_volume(sample)
-
-                device = con_volume.device
-
-                self.sdf_network_finetune.initialize_conditional_volumes(
-                    con_volume,
-                    con_mask_volume
-                )
-            else:
-                self.sdf_network_finetune.initialize_conditional_volumes(
-                    None,
-                    None,
-                    sparse_con_volume,
-                    sparse_coords_volume
-                )
-        else:
-            device = sample['images'].device
-            vol_dims = self.sdf_network_finetune.vol_dims
-            con_volume = torch.zeros(
-                [1, self.sdf_network_finetune.regnet_d_out, vol_dims[0], vol_dims[1], vol_dims[2]]).to(device)
-            con_mask_volume = torch.ones([1, 1, vol_dims[0], vol_dims[1], vol_dims[2]]).to(device)
-            self.sdf_network_finetune.initialize_conditional_volumes(
-                con_volume,
-                con_mask_volume
-            )
-
-        self.sdf_network_lod0, self.sdf_network_lod1 = None, None
-        self.pyramid_feature_network_geometry_lod0, self.pyramid_feature_network_geometry_lod1 = None, None
-
-    def train_step(self, sample,
-                   perturb_overwrite=-1,
-                   background_rgb=None,
-                   iter_step=0,
-                   chunk_size=512,
-                   save_vis=False,
-                   ):
-
-        # * finetune on one specific scene
-        # * only support batch_size==1
-        # ! attention: the list of string cannot be splited in DataParallel
-        batch_idx = sample['batch_idx'][0]
-        meta = sample['meta'][batch_idx]  # the scan lighting ref_view info
-
-        sizeW = sample['img_wh'][0][0]
-        sizeH = sample['img_wh'][0][1]
-        partial_vol_origin = sample['partial_vol_origin']  # [B, 3]
-        near, far = sample['query_near_far'][0, :1], sample['query_near_far'][0, 1:]
-
-        img_index = sample['img_index'][0]  # [n]
-
-        # the full-size ray variables
-        sample_rays = sample['rays']
-        rays_o = sample_rays['rays_o'][0]
-        rays_d = sample_rays['rays_v'][0]
-        rays_ndc_uv = sample_rays['rays_ndc_uv'][0]
-
-        imgs = sample['images'][0]
-        intrinsics = sample['intrinsics'][0]
-        w2cs = sample['w2cs'][0]
-        proj_matrices = sample['affine_mats']
-        scale_mat = sample['scale_mat']
-        trans_mat = sample['trans_mat']
-
-        query_c2w = sample['query_c2w']
-
-        # ***********************     Lod==0     ***********************
-
-        conditional_features_lod0 = self.sdf_network_finetune.get_conditional_volume()
-
-        con_volume_lod0 = conditional_features_lod0['dense_volume_scale0']
-        con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0']
-
-        # coords_lod0 = conditional_features_lod0['coords_scale0']  # [1,3,wX,wY,wZ]
-
-        # # - extract mesh
-        if iter_step % self.val_mesh_freq == 0:
-            torch.cuda.empty_cache()
-            self.validate_mesh(self.sdf_network_finetune,
-                               self.sdf_renderer_finetune.extract_geometry,
-                               conditional_volume=con_volume_lod0,
-                               lod=0,
-                               threshold=0.,
-                               occupancy_mask=con_valid_mask_volume_lod0[0, 0],
-                               mode='ft', meta=meta,
-                               iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat)
-
-            torch.cuda.empty_cache()
-
-        render_out = self.sdf_renderer_finetune.render(
-            rays_o, rays_d, near, far,
-            self.sdf_network_finetune,
-            None,  # rendering_network
-            background_rgb=background_rgb,
-            alpha_inter_ratio=1.0,
-            # * related to conditional feature
-            lod=0,
-            conditional_volume=con_volume_lod0,
-            conditional_valid_mask_volume=con_valid_mask_volume_lod0,
-            # * 2d feature maps
-            feature_maps=None,
-            color_maps=imgs,
-            w2cs=w2cs,
-            intrinsics=intrinsics,
-            img_wh=[sizeW, sizeH],
-            query_c2w=query_c2w,
-            if_general_rendering=False,
-            img_index=img_index,
-            rays_uv=rays_ndc_uv if self.color_patch_weight > 0 else None,
-        )
-
-        # * optional TV regularizer, we don't use in this paper
-        if self.tv_weight > 0:
-            tv = self.sdf_network_finetune.tv_regularizer()
-        else:
-            tv = 0.0
-        render_out['tv'] = tv
-        loss_lod0, losses_lod0, depth_statis_lod0 = self.cal_losses_sdf(render_out, sample_rays, iter_step)
-
-        losses = {
-            # - lod 0
-            'loss_lod0': loss_lod0,
-            'losses_lod0': losses_lod0,
-            'depth_statis_lod0': depth_statis_lod0,
-        }
-
-        return losses
-
-    def val_step(self, sample,
-                 perturb_overwrite=-1,
-                 background_rgb=None,
-                 iter_step=0,
-                 chunk_size=512,
-                 save_vis=True,
-                 ):
-        # * only support batch_size==1
-        # ! attention: the list of string cannot be splited in DataParallel
-        batch_idx = sample['batch_idx'][0]
-        meta = sample['meta'][batch_idx]  # the scan lighting ref_view info
-
-        sizeW = sample['img_wh'][0][0]
-        sizeH = sample['img_wh'][0][1]
-        H, W = sizeH, sizeW
-
-        partial_vol_origin = sample['partial_vol_origin']  # [B, 3]
-        near, far = sample['query_near_far'][0, :1], sample['query_near_far'][0, 1:]
-
-        img_index = sample['img_index'][0]  # [n]
-
-        # the ray variables
-        sample_rays = sample['rays']
-        rays_o = sample_rays['rays_o'][0]
-        rays_d = sample_rays['rays_v'][0]
-        rays_ndc_uv = sample_rays['rays_ndc_uv'][0]
-
-        imgs = sample['images'][0]
-        intrinsics = sample['intrinsics'][0]
-        intrinsics_l_4x = intrinsics.clone()
-        intrinsics_l_4x[:, :2] *= 0.25
-        w2cs = sample['w2cs'][0]
-        c2ws = sample['c2ws'][0]
-        proj_matrices = sample['affine_mats']
-
-        # - the image to render
-        scale_mat = sample['scale_mat']  # [1,4,4]  used to convert mesh into true scale
-        trans_mat = sample['trans_mat']
-        query_c2w = sample['query_c2w']  # [1,4,4]
-        query_w2c = sample['query_w2c']  # [1,4,4]
-        true_img = sample['query_image'][0]
-        true_img = np.uint8(true_img.permute(1, 2, 0).cpu().numpy() * 255)
-
-        depth_min, depth_max = near.cpu().numpy(), far.cpu().numpy()
-
-        true_depth = sample['query_depth'] if 'query_depth' in sample.keys() else None
-        if true_depth is not None:
-            true_depth = true_depth[0].cpu().numpy()
-            true_depth_colored = visualize_depth_numpy(true_depth, [depth_min, depth_max])[0]
-        else:
-            true_depth_colored = None
-
-        rays_o = rays_o.reshape(-1, 3).split(chunk_size)
-        rays_d = rays_d.reshape(-1, 3).split(chunk_size)
-
-        # - obtain conditional features
-        with torch.no_grad():
-            # - lod 0
-            conditional_features_lod0 = self.sdf_network_finetune.get_conditional_volume()
-
-        con_volume_lod0 = conditional_features_lod0['dense_volume_scale0']
-        con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0']
-        # coords_lod0 = conditional_features_lod0['coords_scale0']  # [1,3,wX,wY,wZ]
-
-        out_rgb_fine = []
-        out_normal_fine = []
-        out_depth_fine = []
-
-        out_rgb_mlp = []
-
-        if save_vis:
-            for rays_o_batch, rays_d_batch in zip(rays_o, rays_d):
-
-                # ****** lod 0 ****
-                render_out = self.sdf_renderer_finetune.render(
-                    rays_o_batch, rays_d_batch, near, far,
-                    self.sdf_network_finetune,
-                    None,
-                    background_rgb=background_rgb,
-                    alpha_inter_ratio=1.,
-                    # * related to conditional feature
-                    lod=0,
-                    conditional_volume=con_volume_lod0,
-                    conditional_valid_mask_volume=con_valid_mask_volume_lod0,
-                    # * 2d feature maps
-                    feature_maps=None,
-                    color_maps=imgs,
-                    w2cs=w2cs,
-                    intrinsics=intrinsics,
-                    img_wh=[sizeW, sizeH],
-                    query_c2w=query_c2w,
-                    if_general_rendering=False,
-                    if_render_with_grad=False,
-                    img_index=img_index,
-                    # rays_uv=rays_ndc_uv
-                )
-
-                feasible = lambda key: ((key in render_out) and (render_out[key] is not None))
-
-                if feasible('depth'):
-                    out_depth_fine.append(render_out['depth'].detach().cpu().numpy())
-
-                # if render_out['color_coarse'] is not None:
-                if feasible('color_fine'):
-                    out_rgb_fine.append(render_out['color_fine'].detach().cpu().numpy())
-
-                if feasible('color_mlp'):
-                    out_rgb_mlp.append(render_out['color_mlp'].detach().cpu().numpy())
-
-                if feasible('gradients') and feasible('weights'):
-                    if render_out['inside_sphere'] is not None:
-                        out_normal_fine.append((render_out['gradients'] * render_out['weights'][:,
-                                                                          :self.n_samples + self.n_importance,
-                                                                          None] * render_out['inside_sphere'][
-                                                    ..., None]).sum(dim=1).detach().cpu().numpy())
-                    else:
-                        out_normal_fine.append((render_out['gradients'] * render_out['weights'][:,
-                                                                          :self.n_samples + self.n_importance,
-                                                                          None]).sum(dim=1).detach().cpu().numpy())
-                del render_out
-
-            # - save visualization of lod 0
-
-            self.save_visualization(true_img, true_depth_colored, out_depth_fine, out_normal_fine,
-                                    query_w2c[0], out_rgb_fine, H, W,
-                                    depth_min, depth_max, iter_step, meta, "val_lod0",
-                                    out_color_mlp=out_rgb_mlp, true_depth=true_depth)
-
-        # - extract mesh
-        if (iter_step % self.val_mesh_freq == 0):
-            torch.cuda.empty_cache()
-            self.validate_mesh(self.sdf_network_finetune,
-                               self.sdf_renderer_finetune.extract_geometry,
-                               conditional_volume=con_volume_lod0, lod=0,
-                               threshold=0,
-                               occupancy_mask=con_valid_mask_volume_lod0[0, 0],
-                               mode='val', meta=meta,
-                               iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat)
-
-            torch.cuda.empty_cache()
-
-    def export_mesh_step(self, sample,
-                        perturb_overwrite=-1,
-                        background_rgb=None,
-                        iter_step=0,
-                        chunk_size=512,
-                        save_vis=True,
-                        ):
-        # * only support batch_size==1
-        # ! attention: the list of string cannot be splited in DataParallel
-        batch_idx = sample['batch_idx'][0]
-        # meta = sample['meta'][batch_idx]  # the scan lighting ref_view info
-        meta=''
-
-        sizeW = sample['img_wh'][0][0]
-        sizeH = sample['img_wh'][0][1]
-
-        near, far = sample['query_near_far'][0, :1], sample['query_near_far'][0, 1:]
-
-
-        # the ray variables
-        sample_rays = sample['rays']
-        rays_o = sample_rays['rays_o'][0]
-        rays_d = sample_rays['rays_v'][0]
-
-        intrinsics = sample['intrinsics'][0]
-        intrinsics_l_4x = intrinsics.clone()
-        intrinsics_l_4x[:, :2] *= 0.25
-
-
-        # - the image to render
-        scale_mat = sample['scale_mat']  # [1,4,4]  used to convert mesh into true scale
-        trans_mat = sample['trans_mat']
-
-        true_img = sample['query_image'][0]
-        true_img = np.uint8(true_img.permute(1, 2, 0).cpu().numpy() * 255)
-
-
-        rays_o = rays_o.reshape(-1, 3).split(chunk_size)
-        rays_d = rays_d.reshape(-1, 3).split(chunk_size)
-
-        # import ipdb; ipdb.set_trace()
-        # - obtain conditional features
-        with torch.no_grad():
-            # - lod 0
-            conditional_features_lod0 = self.sdf_network_finetune.get_conditional_volume()
-
-        con_volume_lod0 = conditional_features_lod0['dense_volume_scale0']
-        con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0']
-        # coords_lod0 = conditional_features_lod0['coords_scale0']  # [1,3,wX,wY,wZ]
-
-
-        # - extract mesh
-
-        torch.cuda.empty_cache()
-        self.validate_mesh(self.sdf_network_finetune,
-                            self.sdf_renderer_finetune.extract_geometry,
-                            conditional_volume=con_volume_lod0, lod=0,
-                            threshold=0,
-                            occupancy_mask=con_valid_mask_volume_lod0[0, 0],
-                            mode='val', meta=meta,
-                            iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat)
-
-        torch.cuda.empty_cache()
-
-    def save_visualization(self, true_img, true_colored_depth, out_depth, out_normal, w2cs, out_color, H, W,
-                           depth_min, depth_max, iter_step, meta, comment, out_color_mlp=[], true_depth=None):
-        if len(out_color) > 0:
-            img_fine = (np.concatenate(out_color, axis=0).reshape([H, W, 3]) * 256).clip(0, 255)
-
-        if len(out_color_mlp) > 0:
-            img_mlp = (np.concatenate(out_color_mlp, axis=0).reshape([H, W, 3]) * 256).clip(0, 255)
-
-        if len(out_normal) > 0:
-            normal_img = np.concatenate(out_normal, axis=0)
-            rot = w2cs[:3, :3].detach().cpu().numpy()
-            # - convert normal from world space to camera space
-            normal_img = (np.matmul(rot[None, :, :],
-                                    normal_img[:, :, None]).reshape([H, W, 3]) * 128 + 128).clip(0, 255)
-        if len(out_depth) > 0:
-            pred_depth = np.concatenate(out_depth, axis=0).reshape([H, W])
-            pred_depth_colored = visualize_depth_numpy(pred_depth, [depth_min, depth_max])[0]
-
-        if len(out_depth) > 0:
-            os.makedirs(os.path.join(self.base_exp_dir, 'depths_' + comment), exist_ok=True)
-            if true_colored_depth is not None:
-
-                if true_depth is not None:
-                    depth_error_map = np.abs(true_depth - pred_depth) * 5.0
-                    depth_visualized = np.concatenate(
-                            [depth_error_map, true_colored_depth, pred_depth_colored, true_img], axis=1)[:, :, ::-1]
-                else:
-                    depth_visualized = np.concatenate(
-                            [true_colored_depth, pred_depth_colored, true_img])[:, :, ::-1]
-                cv.imwrite(
-                    os.path.join(self.base_exp_dir, 'depths_' + comment,
-                                 '{:0>8d}_{}.png'.format(iter_step, meta)), depth_visualized
-                    )
-            else:
-                cv.imwrite(
-                    os.path.join(self.base_exp_dir, 'depths_' + comment,
-                                 '{:0>8d}_{}.png'.format(iter_step, meta)),
-                    np.concatenate(
-                        [pred_depth_colored, true_img])[:, :, ::-1])
-        if len(out_color) > 0:
-            os.makedirs(os.path.join(self.base_exp_dir, 'synthesized_color_' + comment), exist_ok=True)
-            cv.imwrite(os.path.join(self.base_exp_dir, 'synthesized_color_' + comment,
-                                    '{:0>8d}_{}.png'.format(iter_step, meta)),
-                       np.concatenate(
-                           [img_fine, true_img])[:, :, ::-1])  # bgr2rgb
-
-        if len(out_color_mlp) > 0:
-            os.makedirs(os.path.join(self.base_exp_dir, 'synthesized_color_mlp_' + comment), exist_ok=True)
-            cv.imwrite(os.path.join(self.base_exp_dir, 'synthesized_color_mlp_' + comment,
-                                    '{:0>8d}_{}.png'.format(iter_step, meta)),
-                       np.concatenate(
-                           [img_mlp, true_img])[:, :, ::-1])  # bgr2rgb
-
-        if len(out_normal) > 0:
-            os.makedirs(os.path.join(self.base_exp_dir, 'normals_' + comment), exist_ok=True)
-            cv.imwrite(os.path.join(self.base_exp_dir, 'normals_' + comment,
-                                    '{:0>8d}_{}.png'.format(iter_step, meta)),
-                       normal_img[:, :, ::-1])
-
-    def forward(self, sample,
-                perturb_overwrite=-1,
-                background_rgb=None,
-                iter_step=0,
-                mode='train',
-                save_vis=False,
-                ):
-
-        if mode == 'train':
-            return self.train_step(sample,
-                                   perturb_overwrite=perturb_overwrite,
-                                   background_rgb=background_rgb,
-                                   iter_step=iter_step,
-                                   )
-        elif mode == 'val':
-            return self.val_step(sample,
-                                 perturb_overwrite=perturb_overwrite,
-                                 background_rgb=background_rgb,
-                                 iter_step=iter_step, save_vis=save_vis,
-                                 )
-        elif mode == 'export_mesh':
-            return self.export_mesh_step(sample,
-                                        perturb_overwrite=perturb_overwrite,
-                                        background_rgb=background_rgb,
-                                        iter_step=iter_step, save_vis=save_vis,
-                                        )   
-
-    def obtain_pyramid_feature_maps(self, imgs, lod=0):
-        """
-        get feature maps of all conditional images
-        :param imgs:
-        :return:
-        """
-
-        if lod == 0:
-            extractor = self.pyramid_feature_network_geometry_lod0
-        elif lod >= 1:
-            extractor = self.pyramid_feature_network_geometry_lod1
-
-        pyramid_feature_maps = extractor(imgs)
-
-        # * the pyramid features are very important, if only use the coarst features, hard to optimize
-        fused_feature_maps = torch.cat([
-            F.interpolate(pyramid_feature_maps[0], scale_factor=4, mode='bilinear', align_corners=True),
-            F.interpolate(pyramid_feature_maps[1], scale_factor=2, mode='bilinear', align_corners=True),
-            pyramid_feature_maps[2]
-        ], dim=1)
-
-        return fused_feature_maps
-
-    def cal_losses_sdf(self, render_out, sample_rays, iter_step=-1):
-
-        def get_weight(iter_step, weight):
-            if iter_step < 0:
-                return weight
-
-            if self.anneal_end == 0.0:
-                return weight
-            elif iter_step < self.anneal_start:
-                return 0.0
-            else:
-                return np.min(
-                    [1.0,
-                     (iter_step - self.anneal_start) / (self.anneal_end * 2 - self.anneal_start)]) * weight
-
-        rays_o = sample_rays['rays_o'][0]
-        rays_d = sample_rays['rays_v'][0]
-        true_rgb = sample_rays['rays_color'][0]
-
-        if 'rays_depth' in sample_rays.keys():
-            true_depth = sample_rays['rays_depth'][0]
-        else:
-            true_depth = None
-        mask = sample_rays['rays_mask'][0]
-
-        color_fine = render_out['color_fine']
-        color_fine_mask = render_out['color_fine_mask']
-        depth_pred = render_out['depth']
-
-        variance = render_out['variance']
-        cdf_fine = render_out['cdf_fine']
-        weight_sum = render_out['weights_sum']
-
-        if self.train_from_scratch:
-            occlusion_aware = False if iter_step < 5000 else True
-        else:
-            occlusion_aware = True
-
-        gradient_error_fine = render_out['gradient_error_fine']
-
-        sdf = render_out['sdf']
-
-        # * color generated by mlp
-        color_mlp = render_out['color_mlp']
-        color_mlp_mask = render_out['color_mlp_mask']
-
-        if color_mlp is not None:
-            # Color loss
-            color_mlp_mask = color_mlp_mask[..., 0]
-
-            color_mlp_loss, color_mlp_error = self.occlusion_color_criterion(pred=color_mlp, gt=true_rgb,
-                                                                             weight=weight_sum.squeeze(),
-                                                                             mask=color_mlp_mask,
-                                                                             occlusion_aware=occlusion_aware)
-
-            psnr_mlp = 20.0 * torch.log10(
-                1.0 / (((color_mlp[color_mlp_mask] - true_rgb[color_mlp_mask]) ** 2).mean() / (3.0)).sqrt())
-        else:
-            color_mlp_loss = 0.
-            psnr_mlp = 0.
-
-        # - blended patch loss
-        blended_color_patch = render_out['blended_color_patch']  # [N_pts, Npx, 3]
-        blended_color_patch_mask = render_out['blended_color_patch_mask']  # [N_pts, 1]
-        color_patch_loss = 0.0
-        color_patch_error = 0.0
-        visibility_beta = 0.0
-        if blended_color_patch is not None:
-            rays_patch_color = sample_rays['rays_patch_color'][0]
-            rays_patch_mask = sample_rays['rays_patch_mask'][0]
-            patch_mask = (rays_patch_mask * blended_color_patch_mask).float()[:, 0] > 0  # [N_pts]
-
-            color_patch_loss, color_patch_error, visibility_beta = self.occlusion_color_patch_criterion(
-                blended_color_patch,
-                rays_patch_color,
-                weight=weight_sum.squeeze(),
-                mask=patch_mask,
-                penalize_ratio=self.visibility_penalize_ratio,
-                occlusion_aware=occlusion_aware
-            )
-
-        if true_depth is not None:
-            depth_loss = self.depth_criterion(depth_pred, true_depth, mask)
-
-            # depth evaluation
-            depth_statis = compute_depth_errors(depth_pred.detach().cpu().numpy(), true_depth.cpu().numpy(),
-                                                mask.cpu().numpy() > 0)
-            depth_statis = numpy2tensor(depth_statis, device=rays_o.device)
-        else:
-            depth_loss = 0.
-            depth_statis = None
-
-        # - if without sparse_loss, the mean sdf is 0.02.
-        # - use sparse_loss to prevent occluded pts have 0 sdf
-        sparse_loss_1 = torch.exp(-1 * torch.abs(render_out['sdf_random']) * self.sdf_decay_param * 10).mean()
-        sparse_loss_2 = torch.exp(-1 * torch.abs(sdf) * self.sdf_decay_param).mean()
-        sparse_loss = (sparse_loss_1 + sparse_loss_2) / 2
-
-        sdf_mean = torch.abs(sdf).mean()
-        sparseness_1 = (torch.abs(sdf) < 0.01).to(torch.float32).mean()
-        sparseness_2 = (torch.abs(sdf) < 0.02).to(torch.float32).mean()
-
-        # Eikonal loss
-        gradient_error_loss = gradient_error_fine
-
-        # * optional TV regularizer
-        if 'tv' in render_out.keys():
-            tv = render_out['tv']
-        else:
-            tv = 0.0
-
-        loss = color_mlp_loss + \
-               color_patch_loss * self.color_patch_weight + \
-               sparse_loss * get_weight(iter_step, self.sdf_sparse_weight) + \
-               gradient_error_loss * self.sdf_igr_weight
-
-        losses = {
-            "loss": loss,
-            "depth_loss": depth_loss,
-            "color_mlp_loss": color_mlp_error,
-            "gradient_error_loss": gradient_error_loss,
-            "sparse_loss": sparse_loss,
-            "sparseness_1": sparseness_1,
-            "sparseness_2": sparseness_2,
-            "sdf_mean": sdf_mean,
-            "psnr_mlp": psnr_mlp,
-            "weights_sum": render_out['weights_sum'],
-            "alpha_sum": render_out['alpha_sum'],
-            "variance": render_out['variance'],
-            "sparse_weight": get_weight(iter_step, self.sdf_sparse_weight),
-            'color_patch_loss': color_patch_error,
-            'visibility_beta': visibility_beta,
-            'tv': tv,
-        }
-
-        losses = numpy2tensor(losses, device=rays_o.device)
-
-        return loss, losses, depth_statis
-
-    def validate_mesh(self, sdf_network, func_extract_geometry, world_space=True, resolution=256,
-                      threshold=0.0, mode='val',
-                      # * 3d feature volume
-                      conditional_volume=None, lod=None, occupancy_mask=None,
-                      bound_min=[-1, -1, -1], bound_max=[1, 1, 1], meta='', iter_step=0, scale_mat=None,
-                      trans_mat=None
-                      ):
-        bound_min = torch.tensor(bound_min, dtype=torch.float32)
-        bound_max = torch.tensor(bound_max, dtype=torch.float32)
-
-        vertices, triangles, fields = func_extract_geometry(
-            sdf_network,
-            bound_min, bound_max, resolution=resolution,
-            threshold=threshold, device=conditional_volume.device,
-            # * 3d feature volume
-            conditional_volume=conditional_volume, lod=lod,
-            # occupancy_mask=occupancy_mask
-        )
-
-        
-
-        if scale_mat is not None:
-            scale_mat_np = scale_mat.cpu().numpy()
-            vertices = vertices * scale_mat_np[0][0, 0] + scale_mat_np[0][:3, 3][None]
-
-        if trans_mat is not None:
-            trans_mat_np = trans_mat.cpu().numpy()
-            vertices_homo = np.concatenate([vertices, np.ones_like(vertices[:, :1])], axis=1)
-            vertices = np.matmul(trans_mat_np, vertices_homo[:, :, None])[:, :3, 0]
-
-        mesh = trimesh.Trimesh(vertices, triangles)
-        os.makedirs(os.path.join(self.base_exp_dir, 'meshes_' + mode), exist_ok=True)
-        mesh.export(os.path.join(self.base_exp_dir, 'meshes_' + mode,
-                                 'mesh_{:0>8d}_{}_lod{:0>1d}.ply'.format(iter_step, meta, lod)))
-
-    def gen_video(self, sample,
-                  perturb_overwrite=-1,
-                  background_rgb=None,
-                  iter_step=0,
-                  chunk_size=1024,
-                  ):
-        # * only support batch_size==1
-        batch_idx = sample['batch_idx'][0]
-        meta = sample['meta'][batch_idx]  # the scan lighting ref_view info
-
-        sizeW = sample['img_wh'][0][0]
-        sizeH = sample['img_wh'][0][1]
-        H, W = sizeH, sizeW
-
-        partial_vol_origin = sample['partial_vol_origin']  # [B, 3]
-        near, far = sample['query_near_far'][0, :1], sample['query_near_far'][0, 1:] * 0.8
-
-        img_index = sample['img_index'][0]  # [n]
-
-        # the ray variables
-        sample_rays = sample['rays']
-        rays_o = sample_rays['rays_o'][0]
-        rays_d = sample_rays['rays_v'][0]
-        rays_ndc_uv = sample_rays['rays_ndc_uv'][0]
-
-        imgs = sample['images'][0]
-        intrinsics = sample['intrinsics'][0]
-        intrinsics_l_4x = intrinsics.clone()
-        intrinsics_l_4x[:, :2] *= 0.25
-        w2cs = sample['w2cs'][0]
-        c2ws = sample['c2ws'][0]
-        proj_matrices = sample['affine_mats']
-
-        # - the image to render
-        scale_mat = sample['scale_mat']  # [1,4,4]  used to convert mesh into true scale
-        trans_mat = sample['trans_mat']
-        query_c2w = sample['query_c2w']  # [1,4,4]
-        query_w2c = sample['query_w2c']  # [1,4,4]
-        true_img = sample['query_image'][0]
-        true_img = np.uint8(true_img.permute(1, 2, 0).cpu().numpy() * 255)
-        rendering_c2ws = sample['rendering_c2ws'][0]  # [n, 4, 4]
-        rendering_imgs_idx = sample['rendering_imgs_idx'][0]
-
-        depth_min, depth_max = near.cpu().numpy(), far.cpu().numpy()
-
-        true_depth = sample['query_depth'] if 'query_depth' in sample.keys() else None
-        if true_depth is not None:
-            true_depth = true_depth[0].cpu().numpy()
-            true_depth_colored = visualize_depth_numpy(true_depth, [depth_min, depth_max])[0]
-        else:
-            true_depth_colored = None
-
-        # - obtain conditional features
-        with torch.no_grad():
-            # - lod 0
-            conditional_features_lod0 = self.sdf_network_finetune.get_conditional_volume()
-
-        con_volume_lod0 = conditional_features_lod0['dense_volume_scale0']
-        con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0']
-        # coords_lod0 = conditional_features_lod0['coords_scale0']  # [1,3,wX,wY,wZ]
-
-        inter_views_num = 60
-        resolution_level = 2
-        for r_idx in range(rendering_c2ws.shape[0] - 1):
-            for idx in range(inter_views_num):
-                query_c2w, rays_o, rays_d = gen_rays_between(
-                    rendering_c2ws[r_idx], rendering_c2ws[r_idx + 1], intrinsics[0],
-                    np.sin(((idx / 60.0) - 0.5) * np.pi) * 0.5 + 0.5,
-                    H, W, resolution_level=resolution_level)
-
-                rays_o = rays_o.reshape(-1, 3).split(chunk_size)
-                rays_d = rays_d.reshape(-1, 3).split(chunk_size)
-
-                out_rgb_fine = []
-                out_normal_fine = []
-                out_depth_fine = []
-
-                for rays_o_batch, rays_d_batch in zip(rays_o, rays_d):
-                    # ****** lod 0 ****
-                    render_out = self.sdf_renderer_finetune.render(
-                        rays_o_batch, rays_d_batch, near, far,
-                        self.sdf_network_finetune,
-                        None,
-                        background_rgb=background_rgb,
-                        alpha_inter_ratio=1.,
-                        # * related to conditional feature
-                        lod=0,
-                        conditional_volume=con_volume_lod0,
-                        conditional_valid_mask_volume=con_valid_mask_volume_lod0,
-                        # * 2d feature maps
-                        feature_maps=None,
-                        color_maps=imgs,
-                        w2cs=w2cs,
-                        intrinsics=intrinsics,
-                        img_wh=[sizeW, sizeH],
-                        query_c2w=query_c2w,
-                        if_general_rendering=False,
-                        if_render_with_grad=False,
-                        img_index=img_index,
-                        # rays_uv=rays_ndc_uv
-                    )
-                    # pdb.set_trace()
-                    feasible = lambda key: ((key in render_out) and (render_out[key] is not None))
-
-                    if feasible('depth'):
-                        out_depth_fine.append(render_out['depth'].detach().cpu().numpy())
-
-                    # if render_out['color_coarse'] is not None:
-                    if feasible('color_mlp'):
-                        out_rgb_fine.append(render_out['color_mlp'].detach().cpu().numpy())
-                    if feasible('gradients') and feasible('weights'):
-                        if render_out['inside_sphere'] is not None:
-                            out_normal_fine.append((render_out['gradients'] * render_out['weights'][:,
-                                                                              :self.n_samples + self.n_importance,
-                                                                              None] * render_out['inside_sphere'][
-                                                        ..., None]).sum(dim=1).detach().cpu().numpy())
-                        else:
-                            out_normal_fine.append((render_out['gradients'] * render_out['weights'][:,
-                                                                              :self.n_samples + self.n_importance,
-                                                                              None]).sum(dim=1).detach().cpu().numpy())
-                    del render_out
-
-                img_fine = (np.concatenate(out_rgb_fine, axis=0).reshape(
-                    [H // resolution_level, W // resolution_level, 3, -1]) * 256).clip(0, 255)
-                save_dir = os.path.join(self.base_exp_dir, 'render_{}_{}'.format(rendering_imgs_idx[r_idx],
-                                                                                 rendering_imgs_idx[r_idx + 1]))
-                os.makedirs(save_dir, exist_ok=True)
-                # ic(img_fine.shape)
-                print(cv.imwrite(
-                    os.path.join(save_dir, '{}.png'.format(idx + r_idx * inter_views_num)),
-                    img_fine.squeeze()[:, :, ::-1]))
-                print(os.path.join(save_dir, '{}.png'.format(idx + r_idx * inter_views_num)))
diff --git a/SparseNeuS_demo_v1/models/trainer_generic.py b/SparseNeuS_demo_v1/models/trainer_generic.py
index 5c87d61d5c7feb93dadd40099a5ebe0a9db81924..786ccfd0f84f45ec395db8831b78cecbda803139 100644
--- a/SparseNeuS_demo_v1/models/trainer_generic.py
+++ b/SparseNeuS_demo_v1/models/trainer_generic.py
@@ -8,26 +8,17 @@ import torch.nn as nn
 import torch.nn.functional as F
 
 import numpy as np
-import logging
-import mcubes
 import trimesh
 from icecream import ic
 
 from utils.misc_utils import visualize_depth_numpy
 
-from utils.training_utils import numpy2tensor
 from loss.depth_metric import compute_depth_errors
 
 from loss.depth_loss import DepthLoss, DepthSmoothLoss
 
-from models.rays import gen_rays_between
-
 from models.sparse_neus_renderer import SparseNeuSRenderer
 
-def safe_l2_normalize(x, dim=None, eps=1e-6):
-    return F.normalize(x, p=2, dim=dim, eps=eps)
-
-
 class GenericTrainer(nn.Module):
     def __init__(self,
                  rendering_network_outside,
@@ -223,7 +214,6 @@ class GenericTrainer(nn.Module):
         con_volume_lod0 = conditional_features_lod0['dense_volume_scale0']
 
         con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0']
-        # import ipdb; ipdb.set_trace()
         coords_lod0 = conditional_features_lod0['coords_scale0']  # [1,3,wX,wY,wZ]
 
         # * extract depth maps for all the images
@@ -347,8 +337,6 @@ class GenericTrainer(nn.Module):
                                    mode='train_bg', meta=meta,
                                    iter_step=iter_step, scale_mat=scale_mat,
                                    trans_mat=trans_mat)
-        # import ipdb; ipdb.set_trace()
-        # print("Checker3.1:, after val mesh")
         losses = {
             # - lod 0
             'loss_lod0': loss_lod0,
@@ -427,7 +415,6 @@ class GenericTrainer(nn.Module):
         with torch.no_grad():
             # - obtain conditional features
             geometry_feature_maps = self.obtain_pyramid_feature_maps(imgs, lod=0)
-            # import ipdb; ipdb.set_trace()
             # - lod 0
             conditional_features_lod0 = self.sdf_network_lod0.get_conditional_volume(
                 feature_maps=geometry_feature_maps[None, :, :, :, :],
@@ -836,7 +823,6 @@ class GenericTrainer(nn.Module):
                     print("meta: ", meta)
                     print("scale_factor: ", scale_factor)
                     print("depth_error_mean: ", depth_error_map.mean())
-                    # import ipdb; ipdb.set_trace()
                     depth_visualized = np.concatenate(
                             [(depth_error_map * 255).astype(np.uint8), true_colored_depth, pred_depth_colored, true_img], axis=1)[:, :, ::-1]
                     # print("depth_visualized.shape: ", depth_visualized.shape)
@@ -1003,12 +989,10 @@ class GenericTrainer(nn.Module):
         if color_fine is not None:
             # Color loss
             color_mask = color_fine_mask if color_fine_mask is not None else mask
-            # import ipdb; ipdb.set_trace()
             color_mask = color_mask[..., 0]
             color_error = (color_fine[color_mask] - true_rgb[color_mask])
             # print("Nan number", torch.isnan(color_error).sum())
             # print("Color error shape", color_error.shape)
-            # import ipdb; ipdb.set_trace()
             color_fine_loss = F.l1_loss(color_error, torch.zeros_like(color_error).to(color_error.device),
                                         reduction='mean')
             # print(color_fine_loss)
@@ -1100,8 +1084,7 @@ class GenericTrainer(nn.Module):
             "fg_bg_weight": fg_bg_weight,
             "fg_bg_loss": fg_bg_loss, # added by jha, bug of sparseNeuS
         }
-        # print("[TEST]: weights_sum in trainner forward", losses['weights_sum'].mean())
-        losses = numpy2tensor(losses, device=rays_o.device)
+        losses = torch.tensor(losses, device=rays_o.device)
         return loss, losses, depth_statis
 
     @torch.no_grad()
@@ -1213,7 +1196,7 @@ class GenericTrainer(nn.Module):
             trans_mat_np = trans_mat.cpu().numpy()
             vertices_homo = np.concatenate([vertices, np.ones_like(vertices[:, :1])], axis=1)
             vertices = np.matmul(trans_mat_np, vertices_homo[:, :, None])[:, :3, 0]
-        # import ipdb; ipdb.set_trace()
+
         vertices_color = np.array(vertices_color.squeeze(0).cpu() * 255, dtype=np.uint8)
         mesh = trimesh.Trimesh(vertices, triangles, vertex_colors=vertices_color)
         os.makedirs(os.path.join(self.base_exp_dir, 'meshes_' + mode, 'lod{:0>1d}'.format(lod)), exist_ok=True)
diff --git a/SparseNeuS_demo_v1/models/trainer_generic_normals_new.py b/SparseNeuS_demo_v1/models/trainer_generic_normals_new.py
deleted file mode 100644
index 8a75f2c7fcaf613e1a4c5deeb9a8be15abd96d8d..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/models/trainer_generic_normals_new.py
+++ /dev/null
@@ -1,1313 +0,0 @@
-"""
-decouple the trainer with the renderer
-"""
-import os
-import cv2 as cv
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-import numpy as np
-import logging
-import mcubes
-import trimesh
-from icecream import ic
-
-from utils.misc_utils import visualize_depth_numpy
-
-from utils.training_utils import numpy2tensor
-from loss.depth_metric import compute_depth_errors
-
-from loss.depth_loss import DepthLoss, DepthSmoothLoss
-
-from models.rays import gen_rays_between
-
-from models.sparse_neus_renderer_normals_new import SparseNeuSRenderer
-
-def safe_l2_normalize(x, dim=None, eps=1e-6):
-    return F.normalize(x, p=2, dim=dim, eps=eps)
-
-
-class GenericTrainer(nn.Module):
-    def __init__(self,
-                 rendering_network_outside,
-                 pyramid_feature_network_lod0,
-                 pyramid_feature_network_lod1,
-                 sdf_network_lod0,
-                 sdf_network_lod1,
-                 variance_network_lod0,
-                 variance_network_lod1,
-                 rendering_network_lod0,
-                 rendering_network_lod1,
-                 n_samples_lod0,
-                 n_importance_lod0,
-                 n_samples_lod1,
-                 n_importance_lod1,
-                 n_outside,
-                 perturb,
-                 alpha_type='div',
-                 conf=None,
-                 timestamp="",
-                 mode='train',
-                 base_exp_dir=None,
-                 ):
-        super(GenericTrainer, self).__init__()
-
-        self.conf = conf
-        self.timestamp = timestamp
-
-        
-        self.base_exp_dir = base_exp_dir 
-        
-
-        self.anneal_start = self.conf.get_float('train.anneal_start', default=0.0)
-        self.anneal_end = self.conf.get_float('train.anneal_end', default=0.0)
-        self.anneal_start_lod1 = self.conf.get_float('train.anneal_start_lod1', default=0.0)
-        self.anneal_end_lod1 = self.conf.get_float('train.anneal_end_lod1', default=0.0)
-
-        # network setups
-        self.rendering_network_outside = rendering_network_outside
-        self.pyramid_feature_network_geometry_lod0 = pyramid_feature_network_lod0  # 2D pyramid feature network for geometry
-        self.pyramid_feature_network_geometry_lod1 = pyramid_feature_network_lod1  # use differnet networks for the two lods
-
-        # when num_lods==2, may consume too much memeory
-        self.sdf_network_lod0 = sdf_network_lod0
-        self.sdf_network_lod1 = sdf_network_lod1
-
-        # - warpped by ModuleList to support DataParallel
-        self.variance_network_lod0 = variance_network_lod0
-        self.variance_network_lod1 = variance_network_lod1
-
-        self.rendering_network_lod0 = rendering_network_lod0
-        self.rendering_network_lod1 = rendering_network_lod1
-
-        self.n_samples_lod0 = n_samples_lod0
-        self.n_importance_lod0 = n_importance_lod0
-        self.n_samples_lod1 = n_samples_lod1
-        self.n_importance_lod1 = n_importance_lod1
-        self.n_outside = n_outside
-        self.num_lods = conf.get_int('model.num_lods')  # the number of octree lods
-        self.perturb = perturb
-        self.alpha_type = alpha_type
-
-        # - the two renderers
-        self.sdf_renderer_lod0 = SparseNeuSRenderer(
-            self.rendering_network_outside,
-            self.sdf_network_lod0,
-            self.variance_network_lod0,
-            self.rendering_network_lod0,
-            self.n_samples_lod0,
-            self.n_importance_lod0,
-            self.n_outside,
-            self.perturb,
-            alpha_type='div',
-            conf=self.conf)
-
-        self.sdf_renderer_lod1 = SparseNeuSRenderer(
-            self.rendering_network_outside,
-            self.sdf_network_lod1,
-            self.variance_network_lod1,
-            self.rendering_network_lod1,
-            self.n_samples_lod1,
-            self.n_importance_lod1,
-            self.n_outside,
-            self.perturb,
-            alpha_type='div',
-            conf=self.conf)
-
-        self.if_fix_lod0_networks = self.conf.get_bool('train.if_fix_lod0_networks')
-
-        # sdf network weights
-        self.sdf_igr_weight = self.conf.get_float('train.sdf_igr_weight')
-        self.sdf_sparse_weight = self.conf.get_float('train.sdf_sparse_weight', default=0)
-        self.sdf_decay_param = self.conf.get_float('train.sdf_decay_param', default=100)
-        self.fg_bg_weight = self.conf.get_float('train.fg_bg_weight', default=0.00)
-        self.bg_ratio = self.conf.get_float('train.bg_ratio', default=0.0)
-
-        self.depth_criterion = DepthLoss()
-
-        # - DataParallel mode, cannot modify attributes in forward()
-        # self.iter_step = 0
-        self.val_mesh_freq = self.conf.get_int('train.val_mesh_freq')
-
-        # - True for finetuning; False for general training
-        self.if_fitted_rendering = self.conf.get_bool('train.if_fitted_rendering', default=False)
-
-        self.prune_depth_filter = self.conf.get_bool('model.prune_depth_filter', default=False)
-
-    def get_trainable_params(self):
-        # set trainable params
-
-        self.params_to_train = []
-
-        if not self.if_fix_lod0_networks:
-            #  load pretrained featurenet
-            self.params_to_train += list(self.pyramid_feature_network_geometry_lod0.parameters())
-            self.params_to_train += list(self.sdf_network_lod0.parameters())
-            self.params_to_train += list(self.variance_network_lod0.parameters())
-
-            if self.rendering_network_lod0 is not None:
-                self.params_to_train += list(self.rendering_network_lod0.parameters())
-
-        if self.sdf_network_lod1 is not None:
-            #  load pretrained featurenet
-            self.params_to_train += list(self.pyramid_feature_network_geometry_lod1.parameters())
-
-            self.params_to_train += list(self.sdf_network_lod1.parameters())
-            self.params_to_train += list(self.variance_network_lod1.parameters())
-            if self.rendering_network_lod1 is not None:
-                self.params_to_train += list(self.rendering_network_lod1.parameters())
-
-        return self.params_to_train
-
-    def train_step(self, sample,
-                   perturb_overwrite=-1,
-                   background_rgb=None,
-                   alpha_inter_ratio_lod0=0.0,
-                   alpha_inter_ratio_lod1=0.0,
-                   iter_step=0,
-                   ):
-        # * only support batch_size==1
-        # ! attention: the list of string cannot be splited in DataParallel
-        batch_idx = sample['batch_idx'][0]
-        meta = sample['meta'][batch_idx]  # the scan lighting ref_view info
-
-        sizeW = sample['img_wh'][0][0]
-        sizeH = sample['img_wh'][0][1]
-        partial_vol_origin = sample['partial_vol_origin']  # [B, 3]
-        near, far = sample['near_fars'][0, 0, :1], sample['near_fars'][0, 0, 1:]
-
-        # the full-size ray variables
-        sample_rays = sample['rays']
-        rays_o = sample_rays['rays_o'][0]
-        rays_d = sample_rays['rays_v'][0]
-
-        imgs = sample['images'][0]
-        intrinsics = sample['intrinsics'][0]
-        intrinsics_l_4x = intrinsics.clone()
-        intrinsics_l_4x[:, :2] *= 0.25
-        w2cs = sample['w2cs'][0]
-        c2ws = sample['c2ws'][0]
-        proj_matrices = sample['affine_mats']
-        scale_mat = sample['scale_mat']
-        trans_mat = sample['trans_mat']
-
-        # ***********************     Lod==0     ***********************
-        if not self.if_fix_lod0_networks:
-            geometry_feature_maps = self.obtain_pyramid_feature_maps(imgs)
-
-            conditional_features_lod0 = self.sdf_network_lod0.get_conditional_volume(
-                feature_maps=geometry_feature_maps[None, 1:, :, :, :],
-                partial_vol_origin=partial_vol_origin,
-                proj_mats=proj_matrices[:,1:],
-                # proj_mats=proj_matrices,
-                sizeH=sizeH,
-                sizeW=sizeW,
-                lod=0,
-            )
-
-        else:
-            with torch.no_grad():
-                geometry_feature_maps = self.obtain_pyramid_feature_maps(imgs, lod=0)
-                # geometry_feature_maps = self.obtain_pyramid_feature_maps(imgs, lod=0)
-                conditional_features_lod0 = self.sdf_network_lod0.get_conditional_volume(
-                    feature_maps=geometry_feature_maps[None, 1:, :, :, :],
-                    partial_vol_origin=partial_vol_origin,
-                    proj_mats=proj_matrices[:,1:],
-                    # proj_mats=proj_matrices,
-                    sizeH=sizeH,
-                    sizeW=sizeW,
-                    lod=0,
-                )
-        # print("Checker2:, construct cost volume")
-        con_volume_lod0 = conditional_features_lod0['dense_volume_scale0']
-
-        con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0']
-        # import ipdb; ipdb.set_trace()
-        coords_lod0 = conditional_features_lod0['coords_scale0']  # [1,3,wX,wY,wZ]
-
-        # * extract depth maps for all the images
-        depth_maps_lod0, depth_masks_lod0 = None, None
-        if self.num_lods > 1:
-            sdf_volume_lod0 = self.sdf_network_lod0.get_sdf_volume(
-                con_volume_lod0, con_valid_mask_volume_lod0,
-                coords_lod0, partial_vol_origin)  # [1, 1, dX, dY, dZ]
-
-        if self.prune_depth_filter:
-            depth_maps_lod0_l4x, depth_masks_lod0_l4x = self.sdf_renderer_lod0.extract_depth_maps(
-                self.sdf_network_lod0, sdf_volume_lod0, intrinsics_l_4x, c2ws,
-                sizeH // 4, sizeW // 4, near * 1.5, far)
-            depth_maps_lod0 = F.interpolate(depth_maps_lod0_l4x, size=(sizeH, sizeW), mode='bilinear',
-                                            align_corners=True)
-            depth_masks_lod0 = F.interpolate(depth_masks_lod0_l4x.float(), size=(sizeH, sizeW), mode='nearest')
-
-        # *************** losses
-        loss_lod0, losses_lod0, depth_statis_lod0 = None, None, None
-
-        if not self.if_fix_lod0_networks:
-
-            render_out = self.sdf_renderer_lod0.render(
-                rays_o, rays_d, near, far,
-                self.sdf_network_lod0,
-                self.rendering_network_lod0,
-                background_rgb=background_rgb,
-                alpha_inter_ratio=alpha_inter_ratio_lod0,
-                # * related to conditional feature
-                lod=0,
-                conditional_volume=con_volume_lod0,
-                conditional_valid_mask_volume=con_valid_mask_volume_lod0,
-                # * 2d feature maps
-                feature_maps=geometry_feature_maps,
-                color_maps=imgs,
-                w2cs=w2cs,
-                intrinsics=intrinsics,
-                img_wh=[sizeW, sizeH],
-                if_general_rendering=True,
-                if_render_with_grad=True,
-            )
-
-            loss_lod0, losses_lod0, depth_statis_lod0 = self.cal_losses_sdf(render_out, sample_rays,
-                                                                                     iter_step, lod=0)
-
-        # ***********************     Lod==1     ***********************
-
-        loss_lod1, losses_lod1, depth_statis_lod1 = None, None, None
-
-        if self.num_lods > 1:
-            geometry_feature_maps_lod1 = self.obtain_pyramid_feature_maps(imgs, lod=1)
-            # geometry_feature_maps_lod1 = self.obtain_pyramid_feature_maps(imgs, lod=1)
-            if self.prune_depth_filter:
-                pre_coords, pre_feats = self.sdf_renderer_lod0.get_valid_sparse_coords_by_sdf_depthfilter(
-                    sdf_volume_lod0[0], coords_lod0[0], con_valid_mask_volume_lod0[0], con_volume_lod0[0],
-                    depth_maps_lod0, proj_matrices[0],
-                    partial_vol_origin, self.sdf_network_lod0.voxel_size,
-                    near, far, self.sdf_network_lod0.voxel_size, 12)
-            else:
-                pre_coords, pre_feats = self.sdf_renderer_lod0.get_valid_sparse_coords_by_sdf(
-                    sdf_volume_lod0[0], coords_lod0[0], con_valid_mask_volume_lod0[0], con_volume_lod0[0])
-
-            pre_coords[:, 1:] = pre_coords[:, 1:] * 2
-
-            # ? It seems that training gru_fusion, this part should be trainable too
-            conditional_features_lod1 = self.sdf_network_lod1.get_conditional_volume(
-                feature_maps=geometry_feature_maps_lod1[None, 1:, :, :, :],
-                partial_vol_origin=partial_vol_origin,
-                proj_mats=proj_matrices[:,1:],
-                # proj_mats=proj_matrices,
-                sizeH=sizeH,
-                sizeW=sizeW,
-                pre_coords=pre_coords,
-                pre_feats=pre_feats,
-            )
-
-            con_volume_lod1 = conditional_features_lod1['dense_volume_scale1']
-            con_valid_mask_volume_lod1 = conditional_features_lod1['valid_mask_volume_scale1']
-
-            # if not self.if_gru_fusion_lod1:
-            render_out_lod1 = self.sdf_renderer_lod1.render(
-                rays_o, rays_d, near, far,
-                self.sdf_network_lod1,
-                self.rendering_network_lod1,
-                background_rgb=background_rgb,
-                alpha_inter_ratio=alpha_inter_ratio_lod1,
-                # * related to conditional feature
-                lod=1,
-                conditional_volume=con_volume_lod1,
-                conditional_valid_mask_volume=con_valid_mask_volume_lod1,
-                # * 2d feature maps
-                feature_maps=geometry_feature_maps_lod1,
-                color_maps=imgs,
-                w2cs=w2cs,
-                intrinsics=intrinsics,
-                img_wh=[sizeW, sizeH],
-                bg_ratio=self.bg_ratio,
-            )
-            loss_lod1, losses_lod1, depth_statis_lod1 = self.cal_losses_sdf(render_out_lod1, sample_rays,
-                                                                                     iter_step, lod=1)
-
-        # print("Checker3:, compute losses")
-        # # - extract mesh
-        if iter_step % self.val_mesh_freq == 0:
-            torch.cuda.empty_cache()
-            self.validate_mesh(self.sdf_network_lod0,
-                               self.sdf_renderer_lod0.extract_geometry,
-                               conditional_volume=con_volume_lod0, lod=0,
-                               threshold=0,
-                               # occupancy_mask=con_valid_mask_volume_lod0[0, 0],
-                               mode='train_bg', meta=meta,
-                               iter_step=iter_step, scale_mat=scale_mat,
-                               trans_mat=trans_mat)
-            torch.cuda.empty_cache()
-
-            if self.num_lods > 1:
-                self.validate_mesh(self.sdf_network_lod1,
-                                   self.sdf_renderer_lod1.extract_geometry,
-                                   conditional_volume=con_volume_lod1, lod=1,
-                                   # occupancy_mask=con_valid_mask_volume_lod1[0, 0].detach(),
-                                   mode='train_bg', meta=meta,
-                                   iter_step=iter_step, scale_mat=scale_mat,
-                                   trans_mat=trans_mat)
-        # import ipdb; ipdb.set_trace()
-        # print("Checker3.1:, after val mesh")
-        losses = {
-            # - lod 0
-            'loss_lod0': loss_lod0,
-            'losses_lod0': losses_lod0,
-            'depth_statis_lod0': depth_statis_lod0,
-
-            # - lod 1
-            'loss_lod1': loss_lod1,
-            'losses_lod1': losses_lod1,
-            'depth_statis_lod1': depth_statis_lod1,
-
-        }
-
-        return losses
-
-    def val_step(self, sample,
-                 perturb_overwrite=-1,
-                 background_rgb=None,
-                 alpha_inter_ratio_lod0=0.0,
-                 alpha_inter_ratio_lod1=0.0,
-                 iter_step=0,
-                 chunk_size=512,
-                 save_vis=False,
-                 ):
-        # * only support batch_size==1
-        # ! attention: the list of string cannot be splited in DataParallel
-        batch_idx = sample['batch_idx'][0]
-        meta = sample['meta'][batch_idx]  # the scan lighting ref_view info
-
-        sizeW = sample['img_wh'][0][0]
-        sizeH = sample['img_wh'][0][1]
-        H, W = sizeH, sizeW
-
-        partial_vol_origin = sample['partial_vol_origin']  # [B, 3]
-        near, far = sample['query_near_far'][0, :1], sample['query_near_far'][0, 1:]
-
-        # the ray variables
-        sample_rays = sample['rays']
-        rays_o = sample_rays['rays_o'][0]
-        rays_d = sample_rays['rays_v'][0]
-        rays_ndc_uv = sample_rays['rays_ndc_uv'][0]
-
-        imgs = sample['images'][0]
-        intrinsics = sample['intrinsics'][0]
-        intrinsics_l_4x = intrinsics.clone()
-        intrinsics_l_4x[:, :2] *= 0.25
-        w2cs = sample['w2cs'][0]
-        c2ws = sample['c2ws'][0]
-        proj_matrices = sample['affine_mats']
-
-        # render_img_idx = sample['render_img_idx'][0]
-        # true_img = sample['images'][0][render_img_idx]
-
-        # - the image to render
-        scale_mat = sample['scale_mat']  # [1,4,4]  used to convert mesh into true scale
-        trans_mat = sample['trans_mat']
-        query_c2w = sample['query_c2w']  # [1,4,4]
-        query_w2c = sample['query_w2c']  # [1,4,4]
-        true_img = sample['query_image'][0]
-        true_img = np.uint8(true_img.permute(1, 2, 0).cpu().numpy() * 255)
-
-        depth_min, depth_max = near.cpu().numpy(), far.cpu().numpy()
-
-        scale_factor = sample['scale_factor'][0].cpu().numpy()
-        true_depth = sample['query_depth'] if 'query_depth' in sample.keys() else None
-        if true_depth is not None:
-            true_depth = true_depth[0].cpu().numpy()
-            true_depth_colored = visualize_depth_numpy(true_depth, [depth_min, depth_max])[0]
-        else:
-            true_depth_colored = None
-
-        rays_o = rays_o.reshape(-1, 3).split(chunk_size)
-        rays_d = rays_d.reshape(-1, 3).split(chunk_size)
-
-        # - obtain conditional features
-        with torch.no_grad():
-            # - obtain conditional features
-            geometry_feature_maps = self.obtain_pyramid_feature_maps(imgs, lod=0)
-            # import ipdb; ipdb.set_trace()
-            # - lod 0
-            conditional_features_lod0 = self.sdf_network_lod0.get_conditional_volume(
-                feature_maps=geometry_feature_maps[None, :, :, :, :],
-                partial_vol_origin=partial_vol_origin,
-                proj_mats=proj_matrices,
-                sizeH=sizeH,
-                sizeW=sizeW,
-                lod=0,
-            )
-
-        con_volume_lod0 = conditional_features_lod0['dense_volume_scale0']
-        con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0']
-        coords_lod0 = conditional_features_lod0['coords_scale0']  # [1,3,wX,wY,wZ]
-
-        if self.num_lods > 1:
-            sdf_volume_lod0 = self.sdf_network_lod0.get_sdf_volume(
-                con_volume_lod0, con_valid_mask_volume_lod0,
-                coords_lod0, partial_vol_origin)  # [1, 1, dX, dY, dZ]
-
-        depth_maps_lod0, depth_masks_lod0 = None, None
-        if self.prune_depth_filter:
-            depth_maps_lod0_l4x, depth_masks_lod0_l4x = self.sdf_renderer_lod0.extract_depth_maps(
-                self.sdf_network_lod0, sdf_volume_lod0,
-                intrinsics_l_4x, c2ws,
-                sizeH // 4, sizeW // 4, near * 1.5, far)  # - near*1.5 is a experienced number
-            depth_maps_lod0 = F.interpolate(depth_maps_lod0_l4x, size=(sizeH, sizeW), mode='bilinear',
-                                            align_corners=True)
-            depth_masks_lod0 = F.interpolate(depth_masks_lod0_l4x.float(), size=(sizeH, sizeW), mode='nearest')
-
-            #### visualize the depth_maps_lod0 for checking
-            colored_depth_maps_lod0 = []
-            for i in range(depth_maps_lod0.shape[0]):
-                colored_depth_maps_lod0.append(
-                    visualize_depth_numpy(depth_maps_lod0[i, 0].cpu().numpy(), [depth_min, depth_max])[0])
-
-            colored_depth_maps_lod0 = np.concatenate(colored_depth_maps_lod0, axis=0).astype(np.uint8)
-            os.makedirs(os.path.join(self.base_exp_dir, 'depth_maps_lod0'), exist_ok=True)
-            cv.imwrite(os.path.join(self.base_exp_dir, 'depth_maps_lod0',
-                                    '{:0>8d}_{}.png'.format(iter_step, meta)),
-                       colored_depth_maps_lod0[:, :, ::-1])
-
-        if self.num_lods > 1:
-            geometry_feature_maps_lod1 = self.obtain_pyramid_feature_maps(imgs, lod=1)
-
-            if self.prune_depth_filter:
-                pre_coords, pre_feats = self.sdf_renderer_lod0.get_valid_sparse_coords_by_sdf_depthfilter(
-                    sdf_volume_lod0[0], coords_lod0[0], con_valid_mask_volume_lod0[0], con_volume_lod0[0],
-                    depth_maps_lod0, proj_matrices[0],
-                    partial_vol_origin, self.sdf_network_lod0.voxel_size,
-                    near, far, self.sdf_network_lod0.voxel_size, 12)
-            else:
-                pre_coords, pre_feats = self.sdf_renderer_lod0.get_valid_sparse_coords_by_sdf(
-                    sdf_volume_lod0[0], coords_lod0[0], con_valid_mask_volume_lod0[0], con_volume_lod0[0])
-
-            pre_coords[:, 1:] = pre_coords[:, 1:] * 2
-
-            with torch.no_grad():
-                conditional_features_lod1 = self.sdf_network_lod1.get_conditional_volume(
-                    feature_maps=geometry_feature_maps_lod1[None, :, :, :, :],
-                    partial_vol_origin=partial_vol_origin,
-                    proj_mats=proj_matrices,
-                    sizeH=sizeH,
-                    sizeW=sizeW,
-                    pre_coords=pre_coords,
-                    pre_feats=pre_feats,
-                )
-
-            con_volume_lod1 = conditional_features_lod1['dense_volume_scale1']
-            con_valid_mask_volume_lod1 = conditional_features_lod1['valid_mask_volume_scale1']
-
-        out_rgb_fine = []
-        out_normal_fine = []
-        out_depth_fine = []
-
-        out_rgb_fine_lod1 = []
-        out_normal_fine_lod1 = []
-        out_depth_fine_lod1 = []
-
-        # out_depth_fine_explicit = []
-        if save_vis:
-            for rays_o_batch, rays_d_batch in zip(rays_o, rays_d):
-
-                # ****** lod 0 ****
-                render_out = self.sdf_renderer_lod0.render(
-                    rays_o_batch, rays_d_batch, near, far,
-                    self.sdf_network_lod0,
-                    self.rendering_network_lod0,
-                    background_rgb=background_rgb,
-                    alpha_inter_ratio=alpha_inter_ratio_lod0,
-                    # * related to conditional feature
-                    lod=0,
-                    conditional_volume=con_volume_lod0,
-                    conditional_valid_mask_volume=con_valid_mask_volume_lod0,
-                    # * 2d feature maps
-                    feature_maps=geometry_feature_maps,
-                    color_maps=imgs,
-                    w2cs=w2cs,
-                    intrinsics=intrinsics,
-                    img_wh=[sizeW, sizeH],
-                    query_c2w=query_c2w,
-                    if_render_with_grad=False,
-                )
-
-                feasible = lambda key: ((key in render_out) and (render_out[key] is not None))
-
-                if feasible('depth'):
-                    out_depth_fine.append(render_out['depth'].detach().cpu().numpy())
-
-                # if render_out['color_coarse'] is not None:
-                if feasible('color_fine'):
-                    out_rgb_fine.append(render_out['color_fine'].detach().cpu().numpy())
-                if feasible('gradients') and feasible('weights'):
-                    if render_out['inside_sphere'] is not None:
-                        out_normal_fine.append((render_out['gradients'] * render_out['weights'][:,
-                                                                          :self.n_samples_lod0 + self.n_importance_lod0,
-                                                                          None] * render_out['inside_sphere'][
-                                                    ..., None]).sum(dim=1).detach().cpu().numpy())
-                    else:
-                        out_normal_fine.append((render_out['gradients'] * render_out['weights'][:,
-                                                                          :self.n_samples_lod0 + self.n_importance_lod0,
-                                                                          None]).sum(dim=1).detach().cpu().numpy())
-                del render_out
-
-            # ****************** lod 1 **************************
-            if self.num_lods > 1:
-                for rays_o_batch, rays_d_batch in zip(rays_o, rays_d):
-                    render_out_lod1 = self.sdf_renderer_lod1.render(
-                        rays_o_batch, rays_d_batch, near, far,
-                        self.sdf_network_lod1,
-                        self.rendering_network_lod1,
-                        background_rgb=background_rgb,
-                        alpha_inter_ratio=alpha_inter_ratio_lod1,
-                        # * related to conditional feature
-                        lod=1,
-                        conditional_volume=con_volume_lod1,
-                        conditional_valid_mask_volume=con_valid_mask_volume_lod1,
-                        # * 2d feature maps
-                        feature_maps=geometry_feature_maps_lod1,
-                        color_maps=imgs,
-                        w2cs=w2cs,
-                        intrinsics=intrinsics,
-                        img_wh=[sizeW, sizeH],
-                        query_c2w=query_c2w,
-                        if_render_with_grad=False,
-                    )
-
-                    feasible = lambda key: ((key in render_out_lod1) and (render_out_lod1[key] is not None))
-
-                    if feasible('depth'):
-                        out_depth_fine_lod1.append(render_out_lod1['depth'].detach().cpu().numpy())
-
-                    # if render_out['color_coarse'] is not None:
-                    if feasible('color_fine'):
-                        out_rgb_fine_lod1.append(render_out_lod1['color_fine'].detach().cpu().numpy())
-                    if feasible('gradients') and feasible('weights'):
-                        if render_out_lod1['inside_sphere'] is not None:
-                            out_normal_fine_lod1.append((render_out_lod1['gradients'] * render_out_lod1['weights'][:,
-                                                                                        :self.n_samples_lod1 + self.n_importance_lod1,
-                                                                                        None] *
-                                                         render_out_lod1['inside_sphere'][
-                                                             ..., None]).sum(dim=1).detach().cpu().numpy())
-                        else:
-                            out_normal_fine_lod1.append((render_out_lod1['gradients'] * render_out_lod1['weights'][:,
-                                                                                        :self.n_samples_lod1 + self.n_importance_lod1,
-                                                                                        None]).sum(
-                                dim=1).detach().cpu().numpy())
-                    del render_out_lod1
-
-            # - save visualization of lod 0
-
-            self.save_visualization(true_img, true_depth_colored, out_depth_fine, out_normal_fine,
-                                    query_w2c[0], out_rgb_fine, H, W,
-                                    depth_min, depth_max, iter_step, meta, "val_lod0", true_depth=true_depth, scale_factor=scale_factor)
-
-            if self.num_lods > 1:
-                self.save_visualization(true_img, true_depth_colored, out_depth_fine_lod1, out_normal_fine_lod1,
-                                        query_w2c[0], out_rgb_fine_lod1, H, W,
-                                        depth_min, depth_max, iter_step, meta, "val_lod1", true_depth=true_depth, scale_factor=scale_factor)
-
-        # - extract mesh
-        if (iter_step % self.val_mesh_freq == 0):
-            torch.cuda.empty_cache()
-            self.validate_mesh(self.sdf_network_lod0,
-                               self.sdf_renderer_lod0.extract_geometry,
-                               conditional_volume=con_volume_lod0, lod=0,
-                               threshold=0,
-                               # occupancy_mask=con_valid_mask_volume_lod0[0, 0],
-                               mode='val_bg', meta=meta,
-                               iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat)
-            torch.cuda.empty_cache()
-
-            if self.num_lods > 1:
-                self.validate_mesh(self.sdf_network_lod1,
-                                   self.sdf_renderer_lod1.extract_geometry,
-                                   conditional_volume=con_volume_lod1, lod=1,
-                                   # occupancy_mask=con_valid_mask_volume_lod1[0, 0].detach(),
-                                   mode='val_bg', meta=meta,
-                                   iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat)
-
-            torch.cuda.empty_cache()
-
-
-
-    def export_mesh_step(self, sample,
-                        perturb_overwrite=-1,
-                        background_rgb=None,
-                        alpha_inter_ratio_lod0=0.0,
-                        alpha_inter_ratio_lod1=0.0,
-                        iter_step=0,
-                        chunk_size=512,
-                        save_vis=False,
-                        ):
-        # * only support batch_size==1
-        # ! attention: the list of string cannot be splited in DataParallel
-        batch_idx = sample['batch_idx'][0]
-        meta = sample['meta'][batch_idx]  # the scan lighting ref_view info
-
-        sizeW = sample['img_wh'][0][0]
-        sizeH = sample['img_wh'][0][1]
-        H, W = sizeH, sizeW
-
-        partial_vol_origin = sample['partial_vol_origin']  # [B, 3]
-        near, far = sample['query_near_far'][0, :1], sample['query_near_far'][0, 1:]
-
-        # the ray variables
-        sample_rays = sample['rays']
-        rays_o = sample_rays['rays_o'][0]
-        rays_d = sample_rays['rays_v'][0]
-        rays_ndc_uv = sample_rays['rays_ndc_uv'][0]
-
-        imgs = sample['images'][0]
-        intrinsics = sample['intrinsics'][0]
-        intrinsics_l_4x = intrinsics.clone()
-        intrinsics_l_4x[:, :2] *= 0.25
-        w2cs = sample['w2cs'][0]
-        c2ws = sample['c2ws'][0]
-        # target_candidate_w2cs = sample['target_candidate_w2cs'][0]
-        proj_matrices = sample['affine_mats']
-
-
-        # - the image to render
-        scale_mat = sample['scale_mat']  # [1,4,4]  used to convert mesh into true scale
-        trans_mat = sample['trans_mat']
-        query_c2w = sample['query_c2w']  # [1,4,4]
-        query_w2c = sample['query_w2c']  # [1,4,4]
-        true_img = sample['query_image'][0]
-        true_img = np.uint8(true_img.permute(1, 2, 0).cpu().numpy() * 255)
-
-        depth_min, depth_max = near.cpu().numpy(), far.cpu().numpy()
-
-        scale_factor = sample['scale_factor'][0].cpu().numpy()
-        true_depth = sample['query_depth'] if 'query_depth' in sample.keys() else None
-        if true_depth is not None:
-            true_depth = true_depth[0].cpu().numpy()
-            true_depth_colored = visualize_depth_numpy(true_depth, [depth_min, depth_max])[0]
-        else:
-            true_depth_colored = None
-
-        rays_o = rays_o.reshape(-1, 3).split(chunk_size)
-        rays_d = rays_d.reshape(-1, 3).split(chunk_size)
-
-        # - obtain conditional features
-        with torch.no_grad():
-            # - obtain conditional features
-            geometry_feature_maps = self.obtain_pyramid_feature_maps(imgs, lod=0)
-            # - lod 0
-            conditional_features_lod0 = self.sdf_network_lod0.get_conditional_volume(
-                feature_maps=geometry_feature_maps[None, :, :, :, :],
-                partial_vol_origin=partial_vol_origin,
-                proj_mats=proj_matrices,
-                sizeH=sizeH,
-                sizeW=sizeW,
-                lod=0,
-            )
-
-        con_volume_lod0 = conditional_features_lod0['dense_volume_scale0']
-        con_valid_mask_volume_lod0 = conditional_features_lod0['valid_mask_volume_scale0']
-        coords_lod0 = conditional_features_lod0['coords_scale0']  # [1,3,wX,wY,wZ]
-
-        if self.num_lods > 1:
-            sdf_volume_lod0 = self.sdf_network_lod0.get_sdf_volume(
-                con_volume_lod0, con_valid_mask_volume_lod0,
-                coords_lod0, partial_vol_origin)  # [1, 1, dX, dY, dZ]
-
-        depth_maps_lod0, depth_masks_lod0 = None, None
-
-
-        if self.num_lods > 1:
-            geometry_feature_maps_lod1 = self.obtain_pyramid_feature_maps(imgs, lod=1)
-
-            if self.prune_depth_filter:
-                pre_coords, pre_feats = self.sdf_renderer_lod0.get_valid_sparse_coords_by_sdf_depthfilter(
-                    sdf_volume_lod0[0], coords_lod0[0], con_valid_mask_volume_lod0[0], con_volume_lod0[0],
-                    depth_maps_lod0, proj_matrices[0],
-                    partial_vol_origin, self.sdf_network_lod0.voxel_size,
-                    near, far, self.sdf_network_lod0.voxel_size, 12)
-            else:
-                pre_coords, pre_feats = self.sdf_renderer_lod0.get_valid_sparse_coords_by_sdf(
-                    sdf_volume_lod0[0], coords_lod0[0], con_valid_mask_volume_lod0[0], con_volume_lod0[0])
-
-            pre_coords[:, 1:] = pre_coords[:, 1:] * 2
-
-            with torch.no_grad():
-                conditional_features_lod1 = self.sdf_network_lod1.get_conditional_volume(
-                    feature_maps=geometry_feature_maps_lod1[None, :, :, :, :],
-                    partial_vol_origin=partial_vol_origin,
-                    proj_mats=proj_matrices,
-                    sizeH=sizeH,
-                    sizeW=sizeW,
-                    pre_coords=pre_coords,
-                    pre_feats=pre_feats,
-                )
-
-            con_volume_lod1 = conditional_features_lod1['dense_volume_scale1']
-            con_valid_mask_volume_lod1 = conditional_features_lod1['valid_mask_volume_scale1']
-
-        out_rgb_fine = []
-        out_normal_fine = []
-        out_depth_fine = []
-
-        out_rgb_fine_lod1 = []
-        out_normal_fine_lod1 = []
-        out_depth_fine_lod1 = []
-
-        # # out_depth_fine_explicit = []
-        # if save_vis:
-        #     for rays_o_batch, rays_d_batch in zip(rays_o, rays_d):
-
-        #         # ****** lod 0 ****
-        #         render_out = self.sdf_renderer_lod0.render(
-        #             rays_o_batch, rays_d_batch, near, far,
-        #             self.sdf_network_lod0,
-        #             self.rendering_network_lod0,
-        #             background_rgb=background_rgb,
-        #             alpha_inter_ratio=alpha_inter_ratio_lod0,
-        #             # * related to conditional feature
-        #             lod=0,
-        #             conditional_volume=con_volume_lod0,
-        #             conditional_valid_mask_volume=con_valid_mask_volume_lod0,
-        #             # * 2d feature maps
-        #             feature_maps=geometry_feature_maps,
-        #             color_maps=imgs,
-        #             w2cs=w2cs,
-        #             intrinsics=intrinsics,
-        #             img_wh=[sizeW, sizeH],
-        #             query_c2w=query_c2w,
-        #             if_render_with_grad=False,
-        #         )
-
-        #         feasible = lambda key: ((key in render_out) and (render_out[key] is not None))
-
-        #         if feasible('depth'):
-        #             out_depth_fine.append(render_out['depth'].detach().cpu().numpy())
-
-        #         # if render_out['color_coarse'] is not None:
-        #         if feasible('color_fine'):
-        #             out_rgb_fine.append(render_out['color_fine'].detach().cpu().numpy())
-        #         if feasible('gradients') and feasible('weights'):
-        #             if render_out['inside_sphere'] is not None:
-        #                 out_normal_fine.append((render_out['gradients'] * render_out['weights'][:,
-        #                                                                   :self.n_samples_lod0 + self.n_importance_lod0,
-        #                                                                   None] * render_out['inside_sphere'][
-        #                                             ..., None]).sum(dim=1).detach().cpu().numpy())
-        #             else:
-        #                 out_normal_fine.append((render_out['gradients'] * render_out['weights'][:,
-        #                                                                   :self.n_samples_lod0 + self.n_importance_lod0,
-        #                                                                   None]).sum(dim=1).detach().cpu().numpy())
-        #         del render_out
-
-        #     # ****************** lod 1 **************************
-        #     if self.num_lods > 1:
-        #         for rays_o_batch, rays_d_batch in zip(rays_o, rays_d):
-        #             render_out_lod1 = self.sdf_renderer_lod1.render(
-        #                 rays_o_batch, rays_d_batch, near, far,
-        #                 self.sdf_network_lod1,
-        #                 self.rendering_network_lod1,
-        #                 background_rgb=background_rgb,
-        #                 alpha_inter_ratio=alpha_inter_ratio_lod1,
-        #                 # * related to conditional feature
-        #                 lod=1,
-        #                 conditional_volume=con_volume_lod1,
-        #                 conditional_valid_mask_volume=con_valid_mask_volume_lod1,
-        #                 # * 2d feature maps
-        #                 feature_maps=geometry_feature_maps_lod1,
-        #                 color_maps=imgs,
-        #                 w2cs=w2cs,
-        #                 intrinsics=intrinsics,
-        #                 img_wh=[sizeW, sizeH],
-        #                 query_c2w=query_c2w,
-        #                 if_render_with_grad=False,
-        #             )
-
-        #             feasible = lambda key: ((key in render_out_lod1) and (render_out_lod1[key] is not None))
-
-        #             if feasible('depth'):
-        #                 out_depth_fine_lod1.append(render_out_lod1['depth'].detach().cpu().numpy())
-
-        #             # if render_out['color_coarse'] is not None:
-        #             if feasible('color_fine'):
-        #                 out_rgb_fine_lod1.append(render_out_lod1['color_fine'].detach().cpu().numpy())
-        #             if feasible('gradients') and feasible('weights'):
-        #                 if render_out_lod1['inside_sphere'] is not None:
-        #                     out_normal_fine_lod1.append((render_out_lod1['gradients'] * render_out_lod1['weights'][:,
-        #                                                                                 :self.n_samples_lod1 + self.n_importance_lod1,
-        #                                                                                 None] *
-        #                                                  render_out_lod1['inside_sphere'][
-        #                                                      ..., None]).sum(dim=1).detach().cpu().numpy())
-        #                 else:
-        #                     out_normal_fine_lod1.append((render_out_lod1['gradients'] * render_out_lod1['weights'][:,
-        #                                                                                 :self.n_samples_lod1 + self.n_importance_lod1,
-        #                                                                                 None]).sum(
-        #                         dim=1).detach().cpu().numpy())
-        #             del render_out_lod1
-
-        #     # - save visualization of lod 0
-
-        #     self.save_visualization(true_img, true_depth_colored, out_depth_fine, out_normal_fine,
-        #                             query_w2c[0], out_rgb_fine, H, W,
-        #                             depth_min, depth_max, iter_step, meta, "val_lod0", true_depth=true_depth, scale_factor=scale_factor)
-
-        #     if self.num_lods > 1:
-        #         self.save_visualization(true_img, true_depth_colored, out_depth_fine_lod1, out_normal_fine_lod1,
-        #                                 query_w2c[0], out_rgb_fine_lod1, H, W,
-        #                                 depth_min, depth_max, iter_step, meta, "val_lod1", true_depth=true_depth, scale_factor=scale_factor)
-
-        # - extract mesh
-        if (iter_step % self.val_mesh_freq == 0):
-            torch.cuda.empty_cache()
-            self.validate_colored_mesh(
-                                        density_or_sdf_network=self.sdf_network_lod0,
-                                        func_extract_geometry=self.sdf_renderer_lod0.extract_geometry,
-                                        conditional_volume=con_volume_lod0,
-                                        conditional_valid_mask_volume = con_valid_mask_volume_lod0,
-                                        feature_maps=geometry_feature_maps,
-                                        color_maps=imgs,
-                                        w2cs=w2cs,
-                                        target_candidate_w2cs=None,
-                                        intrinsics=intrinsics,
-                                        rendering_network=self.rendering_network_lod0,
-                                        rendering_projector=self.sdf_renderer_lod0.rendering_projector,
-                                        lod=0,
-                                        threshold=0,
-                                        query_c2w=query_c2w,
-                                        mode='val_bg', meta=meta,
-                                        iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat
-                                    )
-            torch.cuda.empty_cache()
-
-            if self.num_lods > 1:
-                self.validate_colored_mesh(
-                            density_or_sdf_network=self.sdf_network_lod1,
-                            func_extract_geometry=self.sdf_renderer_lod1.extract_geometry,
-                            conditional_volume=con_volume_lod1,
-                            conditional_valid_mask_volume = con_valid_mask_volume_lod1,
-                            feature_maps=geometry_feature_maps,
-                            color_maps=imgs,
-                            w2cs=w2cs,
-                            target_candidate_w2cs=None,
-                            intrinsics=intrinsics,
-                            rendering_network=self.rendering_network_lod1,
-                            rendering_projector=self.sdf_renderer_lod1.rendering_projector,
-                            lod=1,
-                            threshold=0,
-                            query_c2w=query_c2w,
-                            mode='val_bg', meta=meta,
-                            iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat
-                        )
-            torch.cuda.empty_cache()
-            #     self.validate_mesh(self.sdf_network_lod1,
-            #                        self.sdf_renderer_lod1.extract_geometry,
-            #                        conditional_volume=con_volume_lod1, lod=1,
-            #                        # occupancy_mask=con_valid_mask_volume_lod1[0, 0].detach(),
-            #                        mode='val_bg', meta=meta,
-            #                        iter_step=iter_step, scale_mat=scale_mat, trans_mat=trans_mat)
-
-            # torch.cuda.empty_cache()
-
-
-    def save_visualization(self, true_img, true_colored_depth, out_depth, out_normal, w2cs, out_color, H, W,
-                           depth_min, depth_max, iter_step, meta, comment, out_color_mlp=[], true_depth=None, scale_factor=1.0):
-        if len(out_color) > 0:
-            img_fine = (np.concatenate(out_color, axis=0).reshape([H, W, 3]) * 256).clip(0, 255)
-
-        if len(out_color_mlp) > 0:
-            img_mlp = (np.concatenate(out_color_mlp, axis=0).reshape([H, W, 3]) * 256).clip(0, 255)
-
-        if len(out_normal) > 0:
-            normal_img = np.concatenate(out_normal, axis=0)
-            rot = w2cs[:3, :3].detach().cpu().numpy()
-            # - convert normal from world space to camera space
-            normal_img = (np.matmul(rot[None, :, :],
-                                    normal_img[:, :, None]).reshape([H, W, 3]) * 128 + 128).clip(0, 255)
-        if len(out_depth) > 0:
-            pred_depth = np.concatenate(out_depth, axis=0).reshape([H, W])
-            pred_depth_colored = visualize_depth_numpy(pred_depth, [depth_min, depth_max])[0]
-
-        if len(out_depth) > 0:
-            os.makedirs(os.path.join(self.base_exp_dir, 'depths_' + comment), exist_ok=True)
-            if true_colored_depth is not None:
-
-                if true_depth is not None:
-                    depth_error_map = np.abs(true_depth - pred_depth) * 2.0 / scale_factor
-                    # [256, 256, 1] -> [256, 256, 3]
-                    depth_error_map = np.tile(depth_error_map[:, :, None], [1, 1, 3])
-                    print("meta: ", meta)
-                    print("scale_factor: ", scale_factor)
-                    print("depth_error_mean: ", depth_error_map.mean())
-                    # import ipdb; ipdb.set_trace()
-                    depth_visualized = np.concatenate(
-                            [(depth_error_map * 255).astype(np.uint8), true_colored_depth, pred_depth_colored, true_img], axis=1)[:, :, ::-1]
-                    # print("depth_visualized.shape: ", depth_visualized.shape)
-                    # write  depth error result text on img, the input is a numpy array of [256, 1024, 3]
-                    # cv.putText(depth_visualized.copy(), "depth_error_mean: {:.4f}".format(depth_error_map.mean()), (10, 30), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
-                else:
-                    depth_visualized = np.concatenate(
-                            [true_colored_depth, pred_depth_colored, true_img])[:, :, ::-1]
-                cv.imwrite(
-                    os.path.join(self.base_exp_dir, 'depths_' + comment,
-                                 '{:0>8d}_{}.png'.format(iter_step, meta)), depth_visualized
-                    )
-            else:
-                cv.imwrite(
-                    os.path.join(self.base_exp_dir, 'depths_' + comment,
-                                 '{:0>8d}_{}.png'.format(iter_step, meta)),
-                    np.concatenate(
-                        [pred_depth_colored, true_img])[:, :, ::-1])
-        if len(out_color) > 0:
-            os.makedirs(os.path.join(self.base_exp_dir, 'synthesized_color_' + comment), exist_ok=True)
-            cv.imwrite(os.path.join(self.base_exp_dir, 'synthesized_color_' + comment,
-                                    '{:0>8d}_{}.png'.format(iter_step, meta)),
-                       np.concatenate(
-                           [img_fine, true_img])[:, :, ::-1])  # bgr2rgb
-            # compute psnr (image pixel lie in [0, 255])
-            mse_loss = np.mean((img_fine - true_img) ** 2)
-            psnr = 10 * np.log10(255 ** 2 / mse_loss)
-            
-            print("PSNR: ", psnr)
-
-        if len(out_color_mlp) > 0:
-            os.makedirs(os.path.join(self.base_exp_dir, 'synthesized_color_mlp_' + comment), exist_ok=True)
-            cv.imwrite(os.path.join(self.base_exp_dir, 'synthesized_color_mlp_' + comment,
-                                    '{:0>8d}_{}.png'.format(iter_step, meta)),
-                       np.concatenate(
-                           [img_mlp, true_img])[:, :, ::-1])  # bgr2rgb
-
-        if len(out_normal) > 0:
-            os.makedirs(os.path.join(self.base_exp_dir, 'normals_' + comment), exist_ok=True)
-            cv.imwrite(os.path.join(self.base_exp_dir, 'normals_' + comment,
-                                    '{:0>8d}_{}.png'.format(iter_step, meta)),
-                       normal_img[:, :, ::-1])
-
-    def forward(self, sample,
-                perturb_overwrite=-1,
-                background_rgb=None,
-                alpha_inter_ratio_lod0=0.0,
-                alpha_inter_ratio_lod1=0.0,
-                iter_step=0,
-                mode='train',
-                save_vis=False,
-                ):
-
-        if mode == 'train':
-            return self.train_step(sample,
-                                   perturb_overwrite=perturb_overwrite,
-                                   background_rgb=background_rgb,
-                                   alpha_inter_ratio_lod0=alpha_inter_ratio_lod0,
-                                   alpha_inter_ratio_lod1=alpha_inter_ratio_lod1,
-                                   iter_step=iter_step
-                                   )
-        elif mode == 'val':
-            import time
-            begin = time.time()
-            result =  self.val_step(sample,
-                                 perturb_overwrite=perturb_overwrite,
-                                 background_rgb=background_rgb,
-                                 alpha_inter_ratio_lod0=alpha_inter_ratio_lod0,
-                                 alpha_inter_ratio_lod1=alpha_inter_ratio_lod1,
-                                 iter_step=iter_step,
-                                 save_vis=save_vis,
-                                 )
-            end = time.time()
-            print("val_step time: ", end - begin)
-            return result
-        elif mode == 'export_mesh':
-            import time
-            begin = time.time()
-            result =  self.export_mesh_step(sample,
-                                 perturb_overwrite=perturb_overwrite,
-                                 background_rgb=background_rgb,
-                                 alpha_inter_ratio_lod0=alpha_inter_ratio_lod0,
-                                 alpha_inter_ratio_lod1=alpha_inter_ratio_lod1,
-                                 iter_step=iter_step,
-                                 save_vis=save_vis,
-                                 )
-            end = time.time()
-            print("export mesh time: ", end - begin)
-            return result
-    def obtain_pyramid_feature_maps(self, imgs, lod=0):
-        """
-        get feature maps of all conditional images
-        :param imgs:
-        :return:
-        """
-
-        if lod == 0:
-            extractor = self.pyramid_feature_network_geometry_lod0
-        elif lod >= 1:
-            extractor = self.pyramid_feature_network_geometry_lod1
-
-        pyramid_feature_maps = extractor(imgs)
-
-        # * the pyramid features are very important, if only use the coarst features, hard to optimize
-        fused_feature_maps = torch.cat([
-            F.interpolate(pyramid_feature_maps[0], scale_factor=4, mode='bilinear', align_corners=True),
-            F.interpolate(pyramid_feature_maps[1], scale_factor=2, mode='bilinear', align_corners=True),
-            pyramid_feature_maps[2]
-        ], dim=1)
-
-        return fused_feature_maps
-
-    def cal_losses_sdf(self, render_out, sample_rays, iter_step=-1, lod=0):
-
-        # loss weight schedule; the regularization terms should be added in later training stage
-        def get_weight(iter_step, weight):
-            if lod == 1:
-                anneal_start = self.anneal_end if lod == 0 else self.anneal_end_lod1
-                anneal_end = self.anneal_end if lod == 0 else self.anneal_end_lod1
-                anneal_end = anneal_end * 2
-            else:
-                anneal_start = self.anneal_start if lod == 0 else self.anneal_start_lod1
-                anneal_end = self.anneal_end if lod == 0 else self.anneal_end_lod1
-                anneal_end = anneal_end * 2
-
-            if iter_step < 0:
-                return weight
-
-            if anneal_end == 0.0:
-                return weight
-            elif iter_step < anneal_start:
-                return 0.0
-            else:
-                return np.min(
-                    [1.0,
-                     (iter_step - anneal_start) / (anneal_end - anneal_start)]) * weight
-
-        rays_o = sample_rays['rays_o'][0]
-        rays_d = sample_rays['rays_v'][0]
-        true_rgb = sample_rays['rays_color'][0]
-
-        if 'rays_depth' in sample_rays.keys():
-            true_depth = sample_rays['rays_depth'][0]
-        else:
-            true_depth = None
-        mask = sample_rays['rays_mask'][0]
-
-        color_fine = render_out['color_fine']
-        color_fine_mask = render_out['color_fine_mask']
-        depth_pred = render_out['depth']
-
-        variance = render_out['variance']
-        cdf_fine = render_out['cdf_fine']
-        weight_sum = render_out['weights_sum']
-
-        gradient_error_fine = render_out['gradient_error_fine']
-
-        sdf = render_out['sdf']
-
-        # * color generated by mlp
-        color_mlp = render_out['color_mlp']
-        color_mlp_mask = render_out['color_mlp_mask']
-
-        if color_fine is not None:
-            # Color loss
-            color_mask = color_fine_mask if color_fine_mask is not None else mask
-            # import ipdb; ipdb.set_trace()
-            color_mask = color_mask[..., 0]
-            color_error = (color_fine[color_mask] - true_rgb[color_mask])
-            # print("Nan number", torch.isnan(color_error).sum())
-            # print("Color error shape", color_error.shape)
-            # import ipdb; ipdb.set_trace()
-            color_fine_loss = F.l1_loss(color_error, torch.zeros_like(color_error).to(color_error.device),
-                                        reduction='mean')
-            # print(color_fine_loss)
-            psnr = 20.0 * torch.log10(
-                1.0 / (((color_fine[color_mask] - true_rgb[color_mask]) ** 2).mean() / (3.0)).sqrt())
-        else:
-            color_fine_loss = 0.
-            psnr = 0.
-
-        if color_mlp is not None:
-            # Color loss
-            color_mlp_mask = color_mlp_mask[..., 0]
-            color_error_mlp = (color_mlp[color_mlp_mask] - true_rgb[color_mlp_mask])
-            color_mlp_loss = F.l1_loss(color_error_mlp,
-                                       torch.zeros_like(color_error_mlp).to(color_error_mlp.device),
-                                       reduction='mean')
-
-            psnr_mlp = 20.0 * torch.log10(
-                1.0 / (((color_mlp[color_mlp_mask] - true_rgb[color_mlp_mask]) ** 2).mean() / (3.0)).sqrt())
-        else:
-            color_mlp_loss = 0.
-            psnr_mlp = 0.
-
-        # depth loss is only used for inference, not included in total loss
-        if true_depth is not None:
-            # depth_loss = self.depth_criterion(depth_pred, true_depth, mask)
-            depth_loss = self.depth_criterion(depth_pred, true_depth)
-
-            # # depth evaluation
-            # depth_statis = compute_depth_errors(depth_pred.detach().cpu().numpy(), true_depth.cpu().numpy())
-            # depth_statis = numpy2tensor(depth_statis, device=rays_o.device)
-            depth_statis = None
-        else:
-            depth_loss = 0.
-            depth_statis = None
-
-        sparse_loss_1 = torch.exp(
-            -1 * torch.abs(render_out['sdf_random']) * self.sdf_decay_param).mean()  # - should equal
-        sparse_loss_2 = torch.exp(-1 * torch.abs(sdf) * self.sdf_decay_param).mean()
-        sparse_loss = (sparse_loss_1 + sparse_loss_2) / 2
-
-        sdf_mean = torch.abs(sdf).mean()
-        sparseness_1 = (torch.abs(sdf) < 0.01).to(torch.float32).mean()
-        sparseness_2 = (torch.abs(sdf) < 0.02).to(torch.float32).mean()
-
-        # Eikonal loss
-        gradient_error_loss = gradient_error_fine
-
-        # ! the first 50k, don't use bg constraint
-        fg_bg_weight = 0.0 if iter_step < 50000 else get_weight(iter_step, self.fg_bg_weight)
-
-        # Mask loss, optional
-        # The images of DTU dataset contain large black regions (0 rgb values),
-        # can use this data prior to make fg more clean
-        background_loss = 0.0
-        fg_bg_loss = 0.0
-        if self.fg_bg_weight > 0 and torch.mean((mask < 0.5).to(torch.float32)) > 0.02:
-            weights_sum_fg = render_out['weights_sum_fg']
-            fg_bg_error = (weights_sum_fg - mask)[mask < 0.5]
-            fg_bg_loss = F.l1_loss(fg_bg_error,
-                                   torch.zeros_like(fg_bg_error).to(fg_bg_error.device),
-                                   reduction='mean')
-
-
-
-        loss = 1.0 * depth_loss + color_fine_loss + color_mlp_loss + \
-               sparse_loss * get_weight(iter_step, self.sdf_sparse_weight) + \
-               fg_bg_loss * fg_bg_weight + \
-               gradient_error_loss * self.sdf_igr_weight  # ! gradient_error_loss need a mask
-
-        losses = {
-            "loss": loss,
-            "depth_loss": depth_loss,
-            "color_fine_loss": color_fine_loss,
-            "color_mlp_loss": color_mlp_loss,
-            "gradient_error_loss": gradient_error_loss,
-            "background_loss": background_loss,
-            "sparse_loss": sparse_loss,
-            "sparseness_1": sparseness_1,
-            "sparseness_2": sparseness_2,
-            "sdf_mean": sdf_mean,
-            "psnr": psnr,
-            "psnr_mlp": psnr_mlp,
-            "weights_sum": render_out['weights_sum'],
-            "weights_sum_fg": render_out['weights_sum_fg'],
-            "alpha_sum": render_out['alpha_sum'],
-            "variance": render_out['variance'],
-            "sparse_weight": get_weight(iter_step, self.sdf_sparse_weight),
-            "fg_bg_weight": fg_bg_weight,
-            "fg_bg_loss": fg_bg_loss, # added by jha, bug of sparseNeuS
-        }
-        # print("[TEST]: weights_sum in trainner forward", losses['weights_sum'].mean())
-        losses = numpy2tensor(losses, device=rays_o.device)
-        return loss, losses, depth_statis
-
-    @torch.no_grad()
-    def validate_mesh(self, density_or_sdf_network, func_extract_geometry, world_space=True, resolution=360,
-                      threshold=0.0, mode='val',
-                      # * 3d feature volume
-                      conditional_volume=None, lod=None, occupancy_mask=None,
-                      bound_min=[-1, -1, -1], bound_max=[1, 1, 1], meta='', iter_step=0, scale_mat=None,
-                      trans_mat=None
-                      ):
-
-        bound_min = torch.tensor(bound_min, dtype=torch.float32)
-        bound_max = torch.tensor(bound_max, dtype=torch.float32)
-
-        vertices, triangles, fields = func_extract_geometry(
-            density_or_sdf_network,
-            bound_min, bound_max, resolution=resolution,
-            threshold=threshold, device=conditional_volume.device,
-            # * 3d feature volume
-            conditional_volume=conditional_volume, lod=lod,
-            occupancy_mask=occupancy_mask
-        )
-
-
-        if scale_mat is not None:
-            scale_mat_np = scale_mat.cpu().numpy()
-            vertices = vertices * scale_mat_np[0][0, 0] + scale_mat_np[0][:3, 3][None]
-
-        if trans_mat is not None: # w2c_ref_inv
-            trans_mat_np = trans_mat.cpu().numpy()
-            vertices_homo = np.concatenate([vertices, np.ones_like(vertices[:, :1])], axis=1)
-            vertices = np.matmul(trans_mat_np, vertices_homo[:, :, None])[:, :3, 0]
-
-        mesh = trimesh.Trimesh(vertices, triangles)
-        os.makedirs(os.path.join(self.base_exp_dir, 'meshes_' + mode), exist_ok=True)
-        mesh.export(os.path.join(self.base_exp_dir, 'meshes_' + mode,
-                                 'mesh_{:0>8d}_{}_lod{:0>1d}.ply'.format(iter_step, meta, lod)))
-
-
-
-    def validate_colored_mesh(self, density_or_sdf_network, func_extract_geometry, world_space=True, resolution=360,
-                                threshold=0.0, mode='val',
-                                # * 3d feature volume
-                                conditional_volume=None,
-                                conditional_valid_mask_volume=None,
-                                feature_maps=None,
-                                color_maps = None,
-                                w2cs=None,
-                                target_candidate_w2cs=None,
-                                intrinsics=None,
-                                rendering_network=None,
-                                rendering_projector=None,
-                                query_c2w=None,
-                                lod=None, occupancy_mask=None,
-                                bound_min=[-1, -1, -1], bound_max=[1, 1, 1], meta='', iter_step=0, scale_mat=None,
-                                trans_mat=None
-                                ):
-
-        bound_min = torch.tensor(bound_min, dtype=torch.float32)
-        bound_max = torch.tensor(bound_max, dtype=torch.float32)
-
-        vertices, triangles, fields = func_extract_geometry(
-            density_or_sdf_network,
-            bound_min, bound_max, resolution=resolution,
-            threshold=threshold, device=conditional_volume.device,
-            # * 3d feature volume
-            conditional_volume=conditional_volume, lod=lod,
-            occupancy_mask=occupancy_mask
-        )
-        
-
-        with torch.no_grad():
-            ren_geo_feats, ren_rgb_feats, ren_ray_diff, ren_mask, _, _ = rendering_projector.compute_view_independent(
-                torch.tensor(vertices).to(conditional_volume),
-                lod=0,
-                # * 3d geometry feature volumes
-                geometryVolume=conditional_volume[0],
-                geometryVolumeMask=conditional_valid_mask_volume[0],
-                sdf_network=density_or_sdf_network,
-                # * 2d rendering feature maps
-                rendering_feature_maps=feature_maps, # [n_view, 56, 256, 256]
-                color_maps=color_maps,
-                w2cs=w2cs,
-                target_candidate_w2cs=target_candidate_w2cs,
-                intrinsics=intrinsics,
-                img_wh=[256,256],
-                query_img_idx=0,  # the index of the N_views dim for rendering
-                query_c2w=query_c2w,
-            )
-
-
-            vertices_color, rendering_valid_mask = rendering_network(
-                ren_geo_feats, ren_rgb_feats, ren_ray_diff, ren_mask)
-        
-
-
-        if scale_mat is not None:
-            scale_mat_np = scale_mat.cpu().numpy()
-            vertices = vertices * scale_mat_np[0][0, 0] + scale_mat_np[0][:3, 3][None]
-
-        if trans_mat is not None: # w2c_ref_inv
-            trans_mat_np = trans_mat.cpu().numpy()
-            vertices_homo = np.concatenate([vertices, np.ones_like(vertices[:, :1])], axis=1)
-            vertices = np.matmul(trans_mat_np, vertices_homo[:, :, None])[:, :3, 0]
-        # import ipdb; ipdb.set_trace()
-        vertices_color = np.array(vertices_color.squeeze(0).cpu() * 255, dtype=np.uint8)
-        mesh = trimesh.Trimesh(vertices, triangles, vertex_colors=vertices_color)
-        os.makedirs(os.path.join(self.base_exp_dir, 'meshes_' + mode, 'lod{:0>1d}'.format(lod)), exist_ok=True)
-        mesh.export(os.path.join(self.base_exp_dir, 'meshes_' + mode, 'lod{:0>1d}'.format(lod),
-                                 'mesh_{:0>8d}_{}_lod{:0>1d}.ply'.format(iter_step, meta, lod)))
\ No newline at end of file
diff --git a/SparseNeuS_demo_v1/ops/generate_grids.py b/SparseNeuS_demo_v1/ops/generate_grids.py
index 884c37793131323c566c6d1a738f06d497bbd2fb..304c1c4c1a424c4bc219f39815ed43fea1d9de5d 100644
--- a/SparseNeuS_demo_v1/ops/generate_grids.py
+++ b/SparseNeuS_demo_v1/ops/generate_grids.py
@@ -12,7 +12,7 @@ def generate_grid(n_vox, interval):
     with torch.no_grad():
         # Create voxel grid
         grid_range = [torch.arange(0, n_vox[axis], interval) for axis in range(3)]
-        grid = torch.stack(torch.meshgrid(grid_range[0], grid_range[1], grid_range[2]))  # 3 dx dy dz
+        grid = torch.stack(torch.meshgrid(grid_range[0], grid_range[1], grid_range[2], indexing="ij"))  # 3 dx dy dz
         # ! don't create tensor on gpu; imbalanced gpu memory in ddp mode
         grid = grid.unsqueeze(0).type(torch.float32)  # 1 3 dx dy dz
 
diff --git a/SparseNeuS_demo_v1/utils/training_utils.py b/SparseNeuS_demo_v1/utils/training_utils.py
deleted file mode 100644
index 5d128ba2beda39b708850bd4c17c4603a8a17848..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/utils/training_utils.py
+++ /dev/null
@@ -1,129 +0,0 @@
-import numpy as np
-import torchvision.utils as vutils
-import torch, random
-import torch.nn.functional as F
-
-
-# print arguments
-def print_args(args):
-    print("################################  args  ################################")
-    for k, v in args.__dict__.items():
-        print("{0: <10}\t{1: <30}\t{2: <20}".format(k, str(v), str(type(v))))
-    print("########################################################################")
-
-
-# torch.no_grad warpper for functions
-def make_nograd_func(func):
-    def wrapper(*f_args, **f_kwargs):
-        with torch.no_grad():
-            ret = func(*f_args, **f_kwargs)
-        return ret
-
-    return wrapper
-
-
-# convert a function into recursive style to handle nested dict/list/tuple variables
-def make_recursive_func(func):
-    def wrapper(vars, device=None):
-        if isinstance(vars, list):
-            return [wrapper(x, device) for x in vars]
-        elif isinstance(vars, tuple):
-            return tuple([wrapper(x, device) for x in vars])
-        elif isinstance(vars, dict):
-            return {k: wrapper(v, device) for k, v in vars.items()}
-        else:
-            return func(vars, device)
-
-    return wrapper
-
-
-@make_recursive_func
-def tensor2float(vars):
-    if isinstance(vars, float):
-        return vars
-    elif isinstance(vars, torch.Tensor):
-        return vars.data.item()
-    else:
-        raise NotImplementedError("invalid input type {} for tensor2float".format(type(vars)))
-
-
-@make_recursive_func
-def tensor2numpy(vars):
-    if isinstance(vars, np.ndarray):
-        return vars
-    elif isinstance(vars, torch.Tensor):
-        return vars.detach().cpu().numpy().copy()
-    else:
-        raise NotImplementedError("invalid input type {} for tensor2numpy".format(type(vars)))
-
-
-@make_recursive_func
-def numpy2tensor(vars, device='cpu'):
-    if not isinstance(vars, torch.Tensor) and vars is not None :
-        return torch.tensor(vars, device=device)
-    elif isinstance(vars, torch.Tensor):
-        return vars
-    elif vars is None:
-        return vars
-    else:
-        raise NotImplementedError("invalid input type {} for float2tensor".format(type(vars)))
-
-
-@make_recursive_func
-def tocuda(vars, device='cuda'):
-    if isinstance(vars, torch.Tensor):
-        return vars.to(device)
-    elif isinstance(vars, str):
-        return vars
-    else:
-        raise NotImplementedError("invalid input type {} for tocuda".format(type(vars)))
-
-
-import torch.distributed as dist
-
-
-def synchronize():
-    """
-    Helper function to synchronize (barrier) among all processes when
-    using distributed training
-    """
-    if not dist.is_available():
-        return
-    if not dist.is_initialized():
-        return
-    world_size = dist.get_world_size()
-    if world_size == 1:
-        return
-    dist.barrier()
-
-
-def get_world_size():
-    if not dist.is_available():
-        return 1
-    if not dist.is_initialized():
-        return 1
-    return dist.get_world_size()
-
-
-def reduce_scalar_outputs(scalar_outputs):
-    world_size = get_world_size()
-    if world_size < 2:
-        return scalar_outputs
-    with torch.no_grad():
-        names = []
-        scalars = []
-        for k in sorted(scalar_outputs.keys()):
-            names.append(k)
-            if isinstance(scalar_outputs[k], torch.Tensor):
-                scalars.append(scalar_outputs[k])
-            else:
-                scalars.append(torch.tensor(scalar_outputs[k], device='cuda'))
-        scalars = torch.stack(scalars, dim=0)
-        dist.reduce(scalars, dst=0)
-        if dist.get_rank() == 0:
-            # only main process gets accumulated, so only divide by
-            # world_size in this case
-            scalars /= world_size
-        reduced_scalars = {k: v for k, v in zip(names, scalars)}
-
-    return reduced_scalars
diff --git a/SparseNeuS_demo_v1/weights/ckpt.pth b/SparseNeuS_demo_v1/weights/ckpt.pth
deleted file mode 100644
index ea22ffa970c253e2f1d6cccbe195f703027264f6..0000000000000000000000000000000000000000
--- a/SparseNeuS_demo_v1/weights/ckpt.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ee9a0027096b3f4f304e2801ebe41545241f974f7d812dc802ac70c8aeeab2b2
-size 6859767
diff --git a/sam_utils.py b/sam_utils.py
index 78e56f4ef6645c336fba911b2d7cc7523cfca0d7..fe6371910204a4b1826261c2eed450bfdb9244cf 100644
--- a/sam_utils.py
+++ b/sam_utils.py
@@ -7,16 +7,12 @@ import time
 from segment_anything import sam_model_registry, SamPredictor
 
 def sam_init(device_id=0):
-    import inspect
-    dir_path = os.path.dirname(os.path.abspath(
-            inspect.getfile(inspect.currentframe())))
-    sam_checkpoint = os.path.join(dir_path, "sam_vit_h_4b8939.pth")
+    sam_checkpoint = os.path.join(os.path.dirname(__file__), "sam_vit_h_4b8939.pth")
     model_type = "vit_h"
 
     device = "cuda:{}".format(device_id) if torch.cuda.is_available() else "cpu"
 
-    sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
-    sam.to(device=device)
+    sam = sam_model_registry[model_type](checkpoint=sam_checkpoint).to(device=device)
     predictor = SamPredictor(sam)
     return predictor
 
diff --git a/utils.py b/utils.py
index 34e9627669d3f8976dcdeb7f9b9b2a41e00c7441..5549ce1de43f3f0d6e385506941852e503a8184d 100644
--- a/utils.py
+++ b/utils.py
@@ -1,84 +1,9 @@
-import os, json
+import os
+import json
 import numpy as np
-import base64
-# import matplotlib.pyplot as plt
 import cv2
 from PIL import Image
 
-def image_grid(imgs, rows, cols):
-    assert len(imgs) == rows*cols
-    w, h = imgs[0].size
-    grid = Image.new('RGB', size=(cols*w, rows*h))
-    grid_w, grid_h = grid.size
-    
-    for i, img in enumerate(imgs):
-        grid.paste(img, box=(i%cols*w, i//cols*h))
-    return grid
-def tensor2img(tensor):
-    return Image.fromarray((tensor.detach().cpu().numpy().transpose(1,2,0)*255).astype("uint8"))
-def titled_image(img, title="main"):
-    # add caption to raw_im
-    from PIL import ImageDraw, ImageFont
-    titled_image = img.copy()
-    draw = ImageDraw.Draw(titled_image)
-    import cv2
-    font_path = os.path.join(cv2.__path__[0],'qt','fonts','DejaVuSans.ttf')
-    font = ImageFont.truetype(font_path, size=20)
-    draw.text((0, 0), title, fill=(255, 0, 0), font=font)
-    # show the drawed image
-    return titled_image
-
-def find_image_file(shape_dir):
-    image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.svg', '.webp']
-    processed_images = ['image_sam.png', 'input_256.png', "input_256_rgba.png"]
-    image_files = [file for file in os.listdir(shape_dir) if os.path.splitext(file)[1].lower() in image_extensions and file not in processed_images]
-    return image_files[0]
-
-def encode_image(filepath):
-    with open(filepath, 'rb') as f:
-        image_bytes = f.read()
-    encoded = str(base64.b64encode(image_bytes), 'utf-8')
-    return "data:image/jpg;base64,"+encoded
-
-
-# contrast correction, rescale and recenter
-def image_preprocess(shape_dir, lower_contrast=True, rescale=True):
-    nickname = shape_dir.split("/")[-1]
-    img_path = os.path.join(shape_dir, "image_sam.png")
-    out_path = os.path.join(shape_dir, "input_256.png")
-    out_path_rgba = os.path.join(shape_dir, "input_256_rgba.png")
-    image = Image.open(img_path) #[:,90:550]
-    # print(image.size)
-    image_arr = np.array(image)
-    in_w, in_h = image_arr.shape[:2]
-
-    if lower_contrast:
-        alpha = 0.8  # Contrast control (1.0-3.0)
-        beta =  0   # Brightness control (0-100)
-        # Apply the contrast adjustment
-        image_arr = cv2.convertScaleAbs(image_arr, alpha=alpha, beta=beta)
-        image_arr[image_arr[...,-1]>200, -1] = 255
-
-    ret, mask = cv2.threshold(np.array(image.split()[-1]), 0, 255, cv2.THRESH_BINARY)
-    x, y, w, h = cv2.boundingRect(mask)
-    max_size = max(w, h)
-    print(nickname, max_size/np.max(image.size))
-    ratio = 0.75
-    if rescale:
-        side_len = int(max_size / ratio)
-    else:
-        side_len = in_w
-    padded_image = np.zeros((side_len, side_len, 4), dtype=np.uint8)
-    center = side_len//2
-    padded_image[center-h//2:center-h//2+h, center-w//2:center-w//2+w] = image_arr[y:y+h, x:x+w]
-    rgba = Image.fromarray(padded_image).resize((256, 256), Image.LANCZOS)
-    rgba.save(out_path_rgba)
-
-    rgba_arr = np.array(rgba) / 255.0
-    rgb = rgba_arr[...,:3] * rgba_arr[...,-1:] + (1 - rgba_arr[...,-1:])
-    rgb = Image.fromarray((rgb * 255).astype(np.uint8))
-    rgb.save(out_path)
-
 # contrast correction, rescale and recenter
 def image_preprocess_nosave(input_image, lower_contrast=True, rescale=True):
 
@@ -159,9 +84,7 @@ def get_poses(init_elev):
                             [view_theta + source for view_theta in eyelevel_theta for source in source_theta_delta])
     return img_ids, calc_pose(elevations, azimuths, len(azimuths)).cpu().numpy()
 
-# eval_path = "/objaverse-processed/zero12345_img/%s" % dataset
-# for shape in os.listdir(eval_path):
-#     shape_dir = os.path.join(eval_path, shape)
+
 def gen_poses(shape_dir, pose_est):
     img_ids, input_poses = get_poses(pose_est)
         
@@ -178,4 +101,3 @@ def gen_poses(shape_dir, pose_est):
     json_path = os.path.join(shape_dir, 'pose.json')
     with open(json_path, 'w') as f:
         json.dump(out_dict, f, indent=4)
-    # break
diff --git a/zero123_utils.py b/zero123_utils.py
index 929ecd318eb984f7425356c67357ca8e59755d3b..b3ad274d47c87065ed576e1cfb803c4f741c89e6 100644
--- a/zero123_utils.py
+++ b/zero123_utils.py
@@ -9,7 +9,7 @@ from ldm.models.diffusion.ddim import DDIMSampler
 from omegaconf import OmegaConf
 from PIL import Image
 from rich import print
-from transformers import AutoFeatureExtractor
+from transformers import CLIPImageProcessor
 from torch import autocast
 from torchvision import transforms
 
@@ -35,28 +35,21 @@ def load_model_from_config(config, ckpt, device, verbose=False):
 
 
 def init_model(device, ckpt):
-    import inspect
-    dir_path = os.path.dirname(os.path.abspath(
-            inspect.getfile(inspect.currentframe())))
-    config = os.path.join(dir_path, 'configs/sd-objaverse-finetune-c_concat-256.yaml')
-
+    config = os.path.join(os.path.dirname(__file__), 'configs/sd-objaverse-finetune-c_concat-256.yaml')
     config = OmegaConf.load(config)
 
     # Instantiate all models beforehand for efficiency.
     models = dict()
     print('Instantiating LatentDiffusion...')
-    models['turncam'] = load_model_from_config(config, ckpt, device=device)
-    # print('Instantiating Carvekit HiInterface...')
-    # models['carvekit'] = create_carvekit_interface()
+    models['turncam'] = torch.compile(load_model_from_config(config, ckpt, device=device))
     print('Instantiating StableDiffusionSafetyChecker...')
     models['nsfw'] = StableDiffusionSafetyChecker.from_pretrained(
         'CompVis/stable-diffusion-safety-checker').to(device)
-    print('Instantiating AutoFeatureExtractor...')
-    models['clip_fe'] = AutoFeatureExtractor.from_pretrained(
-        'CompVis/stable-diffusion-safety-checker')
+    models['clip_fe'] = CLIPImageProcessor.from_pretrained(
+        "openai/clip-vit-large-patch14")
     # We multiply all by some factor > 1 to make them less likely to be triggered.
-    models['nsfw'].concept_embeds_weights *= 1.07
-    models['nsfw'].special_care_embeds_weights *= 1.07
+    models['nsfw'].concept_embeds_weights *= 1.2
+    models['nsfw'].special_care_embeds_weights *= 1.2
 
     return models
 
@@ -74,7 +67,6 @@ def sample_model_batch(model, sampler, input_im, xs, ys, n_samples=4, precision=
             c = model.cc_projection(c)
             cond = {}
             cond['c_crossattn'] = [c]
-            # c_concat = model.encode_first_stage((input_im.to(c.device))).mode().detach()
             cond['c_concat'] = [model.encode_first_stage(input_im).mode().detach()
                                 .repeat(n_samples, 1, 1, 1)]
             if scale != 1.0:
@@ -101,30 +93,6 @@ def sample_model_batch(model, sampler, input_im, xs, ys, n_samples=4, precision=
             del cond, c, x_samples_ddim, samples_ddim, uc, input_im
             torch.cuda.empty_cache()
             return ret_imgs
-        
-
-def predict_stage1(model, sampler, input_img_path, save_path_8, adjust_set=[], device="cuda"):
-    raw_im = Image.open(input_img_path)
-    # raw_im = raw_im.resize([256, 256], Image.LANCZOS)
-    # input_im_init = preprocess_image(models, raw_im, preprocess=False)
-    input_im_init = np.asarray(raw_im, dtype=np.float32) / 255.0
-    input_im = transforms.ToTensor()(input_im_init).unsqueeze(0).to(device)
-    input_im = input_im * 2 - 1
-
-    # stage 1: 8
-    delta_x_1_8 = [0] * 4 + [30] * 4 + [-30] * 4
-    delta_y_1_8 = [0+90*(i%4) if i < 4 else 30+90*(i%4) for i in range(8)] + [30+90*(i%4) for i in range(4)]
-
-
-    x_samples_ddims_8 = sample_model_batch(model, sampler, input_im, delta_x_1_8, delta_y_1_8, n_samples=len(delta_x_1_8))
-    for stage1_idx in range(len(x_samples_ddims_8)):
-        if adjust_set != [] and stage1_idx not in adjust_set:
-            continue
-        x_sample = 255.0 * rearrange(x_samples_ddims_8[stage1_idx].numpy(), 'c h w -> h w c')
-        Image.fromarray(x_sample.astype(np.uint8)).save(os.path.join(save_path_8, '%d.png'%(stage1_idx)))
-    del x_samples_ddims_8
-    del input_im
-    torch.cuda.empty_cache()
 
 @torch.no_grad()
 def predict_stage1_gradio(model, raw_im, save_path = "", adjust_set=[], device="cuda", ddim_steps=75, scale=3.0):
@@ -162,9 +130,6 @@ def predict_stage1_gradio(model, raw_im, save_path = "", adjust_set=[], device="
     torch.cuda.empty_cache()
     return ret_imgs
 
-
-
-
 def infer_stage_2(model, save_path_stage1, save_path_stage2, delta_x_2, delta_y_2, indices, device, ddim_steps=75, scale=3.0):
     for stage1_idx in indices:
         # save stage 1 image
@@ -192,7 +157,6 @@ def infer_stage_2(model, save_path_stage1, save_path_stage2, delta_x_2, delta_y_
         torch.cuda.empty_cache()
 
 def zero123_infer(model, input_dir_path, start_idx=0, end_idx=12, indices=None, device="cuda", ddim_steps=75, scale=3.0):
-    # input_dir_path = "/objaverse-processed/zero12345_img/eval/teddy_wild"
     # input_img_path = os.path.join(input_dir_path, "input_256.png")
     save_path_8 = os.path.join(input_dir_path, "stage1_8")
     save_path_8_2 = os.path.join(input_dir_path, "stage2_8")