Spaces:

GAP-LAB
/

LASA

Configuration error

App Files Files Community

HaolinLiu commited on Jun 14, 2024

Commit

18bb538

1 Parent(s): dcad693

update files for demo

Browse files

Files changed (9) hide show

configs/finetune_triplane_diffusion.yaml +1 -1
datasets/taxonomy.py +25 -16
demo/api.py +151 -0
demo/extract_vit_features.py +57 -0
demo/process_data.py +340 -0
demo/simple_dataset.py +182 -0
train_VAE.sh +1 -1
util/misc.py +1 -1
util/simple_image_loader.py +0 -4

configs/finetune_triplane_diffusion.yaml CHANGED Viewed

@@ -37,7 +37,7 @@ model:
     norm: "batch"
     img_in_channels: 1280
     vit_reso: 16
-    use_cat_embedding: ???
     block_type: multiview_local
     par_point_encoder:
       plane_reso: 64

     norm: "batch"
     img_in_channels: 1280
     vit_reso: 16
+    use_cat_embedding: False #only use category embedding when all categories are trained
     block_type: multiview_local
     par_point_encoder:
       plane_reso: 64

datasets/taxonomy.py CHANGED Viewed

@@ -1,20 +1,29 @@
 category_map={
-    "bathtub":0,
-    "bed":1,
-    "cabinet":2,
-    "chair":3,
-    "dishwasher":4,
-    "fireplace":5,
-    "oven":6,
-    "refrigerator":7,
-    "shelf":8,
-    "sink":9,
-    "sofa":10,
-    "stool":11,
-    "stove":12,
-    "table":13,
-    "toilet":14,
-    "washer":15
 }
 category_map_from_synthetic={

+# category_map={
+#     "bathtub":0,
+#     "bed":1,
+#     "cabinet":2,
+#     "chair":3,
+#     "dishwasher":4,
+#     "fireplace":5,
+#     "oven":6,
+#     "refrigerator":7,
+#     "shelf":8,
+#     "sink":9,
+#     "sofa":10,
+#     "stool":11,
+#     "stove":12,
+#     "table":13,
+#     "toilet":14,
+#     "washer":15
+# }
 category_map={
+    "chair":0,
+    "sofa":1,
+    "table":2,
+    "cabinet":3,
+    "bed":4,
+    "shelf":5
 }
 category_map_from_synthetic={

demo/api.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import os,sys
+sys.path.append("..")
+from configs.config_utils import CONFIG
+from models import get_model
+import torch
+import numpy as np
+import open3d as o3d
+import timm
+from PIL import Image
+from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
+from simple_dataset import InTheWild_Dataset,classname_remap,classname_map
+try:
+    from torchvision.transforms import InterpolationMode
+    BICUBIC = InterpolationMode.BICUBIC
+except ImportError:
+    BICUBIC = Image.BICUBIC
+import mcubes
+import trimesh
+from torch.utils.data import DataLoader
+def image_transform(n_px):
+    return Compose([
+        Resize(n_px, interpolation=BICUBIC),
+        CenterCrop(n_px),
+        ToTensor(),
+        Normalize((0.48145466, 0.4578275, 0.40821073),
+                 (0.26862954, 0.26130258, 0.27577711)),
+    ])
+MAX_IMG_LENGTH=5 #take up to 5 images as inputs
+ae_paths={
+        "chair":"../checkpoint/ae/chair/best-checkpoint.pth",
+        "table":"../checkpoint/ae/table/best-checkpoint.pth",
+        "cabinet":"../checkpoint/ae/cabinet/best-checkpoint.pth",
+        "shelf":"../checkpoint/ae/shelf/best-checkpoint.pth",
+        "sofa":"../checkpoint/ae/sofa/best-checkpoint.pth",
+        "bed":"../checkpoint/ae/bed/best-checkpoint.pth"
+        }
+dm_paths={
+        "chair":"../checkpoint/finetune_dm/chair/best-checkpoint.pth",
+        "table":"../checkpoint/finetune_dm/table/best-checkpoint.pth",
+        "cabinet":"../checkpoint/finetune_dm/cabinet/best-checkpoint.pth",
+        "shelf":"../checkpoint/finetune_dm/shelf/best-checkpoint.pth",
+        "sofa":"../checkpoint/finetune_dm/sofa/best-checkpoint.pth",
+        "bed":"../checkpoint/finetune_dm/bed/best-checkpoint.pth"
+        }
+def inference(ae_model,dm_model,data_batch,device,reso=256):
+    density = reso
+    gap = 2.2 / density
+    x = np.linspace(-1.1, 1.1, int(density + 1))
+    y = np.linspace(-1.1, 1.1, int(density + 1))
+    z = np.linspace(-1.1, 1.1, int(density + 1))
+    xv, yv, zv = np.meshgrid(x, y, z, indexing='ij')
+    grid = torch.from_numpy(np.stack([xv, yv, zv]).astype(np.float32)).view(3, -1).transpose(0, 1)[None].to(device,
+                                                                                                            non_blocking=True)
+    with torch.no_grad():
+        sample_input = dm_model.prepare_sample_data(data_batch)
+        sampled_array = dm_model.sample(sample_input, num_steps=36).float()
+        sampled_array = torch.nn.functional.interpolate(sampled_array, scale_factor=2, mode="bilinear")
+    model_ids = data_batch['model_id']
+    tran_mats = data_batch['tran_mat']
+    output_meshes={}
+    for j in range(sampled_array.shape[0]):
+        grid_list = torch.split(grid, 128 ** 3, dim=1)
+        output_list = []
+        with torch.no_grad():
+            for sub_grid in grid_list:
+                output_list.append(ae_model.decode(sampled_array[j:j + 1], sub_grid))
+        output = torch.cat(output_list, dim=1)
+        logits = output[j].detach()
+        volume = logits.view(density + 1, density + 1, density + 1).cpu().numpy()
+        verts, faces = mcubes.marching_cubes(volume, 0)
+        verts *= gap
+        verts -= 1.1
+        tran_mat = tran_mats[j].numpy()
+        verts_homo = np.concatenate([verts, np.ones((verts.shape[0], 1))], axis=1)
+        verts_inwrd = np.dot(verts_homo, tran_mat.T)[:, 0:3]
+        m_inwrd = trimesh.Trimesh(verts_inwrd, faces[:, ::-1]) #transform the mesh into world coordinate
+        output_meshes[model_ids[j]]=m_inwrd
+    return output_meshes
+if __name__=="__main__":
+    import argparse
+    parser=argparse.ArgumentParser()
+    parser.add_argument("--data_dir", type=str, default="../example_process_data")
+    parser.add_argument('--scene_id', default="all", type=str)
+    parser.add_argument("--save_dir", type=str,default="../example_output_data")
+    args = parser.parse_args()
+    config_path="../configs/finetune_triplane_diffusion.yaml"
+    config=CONFIG(config_path).config
+    '''creating save folder'''
+    save_folder=os.path.join(args.save_dir,args.scene_id)
+    os.makedirs(save_folder,exist_ok=True)
+    '''prepare model'''
+    device=torch.device("cuda")
+    ae_config=config['model']['ae']
+    dm_config=config['model']['dm']
+    dm_model=get_model(dm_config).to(device)
+    ae_model=get_model(ae_config).to(device)
+    dm_model.eval()
+    ae_model.eval()
+    '''preparing data'''
+    '''find out how many classes are there in the whole scene'''
+    images_folder=os.path.join(args.data_dir,args.scene_id,"6_images")
+    object_id_list=os.listdir(images_folder)
+    object_class_list=[item.split("_")[0] for item in object_id_list]
+    all_object_class=list(set(object_class_list))
+    exist_super_categories=[]
+    for object_class in all_object_class:
+        if object_class not in classname_remap:
+            continue
+        else:
+            exist_super_categories.append(classname_remap[object_class]) #find which category specific models should be employed
+    exist_super_categories=list(set(exist_super_categories))
+    for super_category in exist_super_categories:
+        print("processing %s"%(super_category))
+        ae_ckpt=torch.load(ae_paths[super_category],map_location="cpu")["model"]
+        dm_ckpt=torch.load(dm_paths[super_category],map_location="cpu")["model"]
+        ae_model.load_state_dict(ae_ckpt)
+        dm_model.load_state_dict(dm_ckpt)
+        dataset = InTheWild_Dataset(data_dir=args.data_dir, scene_id=args.scene_id, category=super_category, max_n_imgs=5)
+        dataloader=DataLoader(
+            dataset=dataset,
+            num_workers=1,
+            batch_size=1,
+            shuffle=False
+        )
+        for data_batch in dataloader:
+            output_meshes=inference(ae_model,dm_model,data_batch,device)
+            #print(output_meshes)
+            for model_id in output_meshes:
+                mesh=output_meshes[model_id]
+                save_path=os.path.join(save_folder,model_id+".ply")
+                print("saving to %s"%(save_path))
+                mesh.export(save_path)

demo/extract_vit_features.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os,sys
+sys.path.append("..")
+import numpy
+from simple_dataset import Simple_InTheWild_dataset
+import argparse
+from torch.utils.data import DataLoader
+import timm
+import torch
+import numpy as np
+from util import misc
+parser=argparse.ArgumentParser()
+parser.add_argument("--data_dir",type=str,default="../example_process_data")
+parser.add_argument('--world_size', default=1, type=int,
+                    help='number of distributed processes')
+parser.add_argument('--local_rank', default=-1, type=int)
+parser.add_argument('--dist_on_itp', action='store_true')
+parser.add_argument('--dist_url', default='env://',
+                    help='url used to set up distributed training')
+parser.add_argument('--scene_id',default="all",type=str)
+args=parser.parse_args()
+misc.init_distributed_mode(args)
+dataset=Simple_InTheWild_dataset(dataset_dir=args.data_dir,scene_id=args.scene_id,n_px=224)
+num_tasks = misc.get_world_size()
+global_rank = misc.get_rank()
+print(num_tasks,global_rank)
+sampler = torch.utils.data.DistributedSampler(
+    dataset, num_replicas=num_tasks, rank=global_rank,
+    shuffle=False)  # shuffle=True to reduce monitor bias
+dataloader=DataLoader(
+    dataset,
+    sampler=sampler,
+    batch_size=10,
+    num_workers=4,
+    pin_memory=True,
+    drop_last=False
+)
+VIT_MODEL = 'vit_huge_patch14_224_clip_laion2b'
+model=timm.create_model(VIT_MODEL, pretrained=True,pretrained_cfg_overlay=dict(file="./open_clip_pytorch_model.bin"))
+model=model.eval().float().cuda()
+for idx,data_batch in enumerate(dataloader):
+    if idx%10==0:
+        print("{}/{}".format(dataloader.__len__(),idx))
+    images = data_batch["images"].cuda().float()
+    model_id= data_batch["model_id"]
+    image_name=data_batch["image_name"]
+    scene_id=data_batch["scene_id"]
+    with torch.no_grad():
+        output_features=model.forward_features(images)
+    for j in range(output_features.shape[0]):
+        save_folder=os.path.join(args.data_dir,scene_id[j],"7_img_feature",model_id[j])
+        os.makedirs(save_folder,exist_ok=True)
+        save_path=os.path.join(save_folder,image_name[j]+".npz")
+        np.savez_compressed(save_path,img_features=output_features[j].detach().cpu().numpy().astype(np.float32))

demo/process_data.py ADDED Viewed

	@@ -0,0 +1,340 @@

+import numpy as np
+import os
+import argparse
+import open3d as o3d
+import glob
+import cv2
+import copy
+def get_roll_rot(angle):
+    ca=np.cos(angle)
+    sa=np.sin(angle)
+    rot=np.array([
+        [ca,-sa,0,0],
+        [sa,ca,0,0],
+        [0,0,1,0],
+        [0,0,0,1]
+    ])
+    return rot
+def rotate_mat(direction):
+    if direction == 'Up':
+        return np.eye(4)
+    elif direction == 'Left':
+        rot_mat=get_roll_rot(np.pi/2)
+    elif direction == 'Right':
+        rot_mat=get_roll_rot(-np.pi/2)
+    elif direction == 'Down':
+        rot_mat=get_roll_rot(np.pi)
+    else:
+        raise Exception(f'No such direction (={direction}) rotation')
+    return rot_mat
+def rotate_K(K,direction):
+    if direction == 'Up' or direction=="Down":
+        new_K4=np.eye(4)
+        new_K4[0:3,0:3]=copy.deepcopy(K)
+        return new_K4
+    elif direction == 'Left' or direction =="Right":
+        fx,fy,cx,cy=K[0,0],K[1,1],K[0,2],K[1,2]
+        new_K4 = np.array([
+            [fy, 0, cy, 0],
+            [0, fx, cx, 0],
+            [0, 0, 1, 0],
+            [0, 0, 0, 1]
+        ])
+        return new_K4
+def rotate_bbox(bbox,direction, H,W):
+    x_min,y_min,x_max,y_max=bbox[0:4]
+    if direction == 'Up':
+        return bbox
+    elif direction == 'Left':
+        #print(W-bbox[1],W-bbox[3])
+        new_bbox=[min(H-bbox[1],H-bbox[3]),bbox[0],max(H-bbox[1],H-bbox[3]),bbox[2]]
+    elif direction == 'Right':
+        new_bbox=[bbox[1],min(W-bbox[0],W-bbox[2]),bbox[3],max(W-bbox[0],W-bbox[2])]
+    elif direction == 'Down':
+        new_bbox=[min(W-x_min,W-x_max),min(H-y_min,H-y_max),max(W-x_min,W-x_max),max(H-y_min,H-y_max)]
+    else:
+        raise Exception(f'No such direction (={direction}) rotation')
+    return new_bbox
+def rotate_image(img, direction):
+    if direction == 'Up':
+        pass
+    elif direction == 'Left':
+        img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
+    elif direction == 'Right':
+        img = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
+    elif direction == 'Down':
+        img = cv2.rotate(img, cv2.ROTATE_180)
+    else:
+        raise Exception(f'No such direction (={direction}) rotation')
+    return img
+parser=argparse.ArgumentParser()
+parser.add_argument("--data_folder",type=str,required=True)
+parser.add_argument("--save_dir",type=str,default=r"../example_process_data")
+parser.add_argument("--debug",action="store_true",default=False)
+args=parser.parse_args()
+print("processing %s"%(args.data_folder))
+data_folder=args.data_folder
+scene_name=os.path.basename(data_folder)
+save_folder=os.path.join(args.save_dir,scene_name)
+os.makedirs(save_folder,exist_ok=True)
+color_folder=os.path.join(data_folder,"color")
+depth_folder=os.path.join(data_folder,"depth")
+pose_folder=os.path.join(data_folder,"pose")
+print(color_folder)
+color_list=glob.glob(color_folder+"/*.jpg")
+image_id_list=[os.path.basename(item)[0:-4] for item in color_list]
+image_id_list.sort()
+bbox_path=os.path.join(data_folder,"objects.npy")
+bboxes_dict=np.load(bbox_path,allow_pickle=True).item()
+intrinsic_path=os.path.join(data_folder,"intrinsic","intrinsic_color.txt")
+K=np.loadtxt(intrinsic_path)
+align_path=os.path.join(data_folder,"alignment_matrix.txt")
+align_matrix=np.loadtxt(align_path)
+if align_matrix.shape[0]==3:
+    new_align_matrix=np.eye(4)
+    new_align_matrix[0:3,0:3]=align_matrix
+    align_matrix=new_align_matrix
+mesh_path=os.path.join(data_folder,"fused_mesh.ply")
+o3d_mesh=o3d.io.read_triangle_mesh(mesh_path)
+o3d_vertices = np.array(o3d_mesh.vertices)
+o3d_vert_homo=np.concatenate([o3d_vertices,np.ones([o3d_vertices.shape[0],1])],axis=1)
+align_o3d_vertices = np.dot(o3d_vert_homo,align_matrix)[:,0:3]
+o3d_mesh.vertices = o3d.utility.Vector3dVector(align_o3d_vertices)
+align_mesh_save_path=os.path.join(save_folder,"align_mesh.ply")
+o3d.io.write_triangle_mesh(align_mesh_save_path,o3d_mesh)
+x=np.linspace(-1,1,10)
+y=np.linspace(-1,1,10)
+z=np.linspace(-1,1,10)
+X,Y,Z=np.meshgrid(x,y,z,indexing='ij')
+vox_coor=np.concatenate([X[:,:,:,np.newaxis],Y[:,:,:,np.newaxis],Z[:,:,:,np.newaxis]],axis=-1)
+vox_coor=np.reshape(vox_coor,(-1,3))
+#print(np.amin(vox_coor,axis=0),np.amax(vox_coor,axis=0))
+pre_proj_mates={}
+obj_points_dict={}
+trans_mats={}
+point_save_folder=os.path.join(save_folder,"5_partial_points")
+os.makedirs(point_save_folder,exist_ok=True)
+tran_save_folder=os.path.join(save_folder,"10_tran_matrix")
+os.makedirs(tran_save_folder,exist_ok=True)
+for object_id in bboxes_dict:
+    object = bboxes_dict[object_id]
+    category = object['category']
+    sizes = object['size']
+    sizes *= 1.1
+    transform_matrix_t = np.array(object['transform']).reshape([4, 4])
+    translate = transform_matrix_t[:3, 3]
+    rotation = transform_matrix_t[:3, :3]
+    bbox_o3d = o3d.geometry.OrientedBoundingBox(translate.reshape([3, 1]),
+                                                rotation,
+                                                np.array(sizes).reshape([3, 1]))
+    crop_pcd = o3d_mesh.crop(bbox_o3d)
+    crop_vert = np.asarray(crop_pcd.vertices)
+    org_crop_vert = crop_vert[:, :]
+    crop_vert = crop_vert - translate
+    crop_vert = np.dot(crop_vert,np.linalg.inv(rotation).T)
+    crop_vert[:, 2] *= -1
+    bb_min, bb_max = np.amin(crop_vert, axis=0), np.amax(crop_vert, axis=0)
+    max_length = (bb_max - bb_min).max()
+    center = (bb_max + bb_min) / 2
+    crop_vert = (crop_vert - center) / max_length * 2
+    obj_points_dict[object_id]=crop_vert
+    crop_pcd.vertices=o3d.utility.Vector3dVector(crop_vert)
+    save_path=os.path.join(point_save_folder,category+"_%d.ply"%(object_id))
+    o3d.io.write_triangle_mesh(save_path,crop_pcd)
+    proj_mat = np.eye(4)
+    scale_tran = np.eye(4)
+    scale_tran[0, 0], scale_tran[1, 1], scale_tran[2, 2] = max_length / 2, max_length / 2, max_length / 2
+    proj_mat = np.dot(proj_mat, scale_tran)
+    center_tran = np.eye(4)
+    center_tran[0:3, 3] = center
+    proj_mat = np.dot(center_tran, proj_mat)
+    invert_mat = np.eye(4)
+    invert_mat[2, 2] *= -1
+    proj_mat = np.dot(invert_mat, proj_mat)
+    proj_mat[0:3, 0:3] = np.dot(rotation,proj_mat[0:3, 0:3])
+    translate_mat = np.eye(4)
+    translate_mat[0:3, 3] = translate
+    proj_mat = np.dot(translate_mat, proj_mat)
+    '''tran mat is to align output to scene space'''
+    tran_mat=copy.deepcopy(proj_mat)
+    trans_mats[object_id]=tran_mat
+    tran_save_path=os.path.join(tran_save_folder,category+"_%d.npy"%(object_id))
+    np.save(tran_save_path,tran_mat)
+    unalign_mat = np.linalg.inv(align_matrix)
+    proj_mat = np.dot(unalign_mat.T, proj_mat)
+    pre_proj_mates[object_id]=proj_mat
+ref=np.array([
+            [0,1.0], #Up
+            [-1.0,0],#Left
+            [0,1.0], #Right
+            [0.0,-1.0] #Down
+        ]) #4*2
+dir_list=[
+    "Down",
+    "Left",
+    "Right",
+    "Up"
+]
+for image_id in image_id_list:
+    color_path=os.path.join(color_folder,image_id+".jpg")
+    depth_path=os.path.join(depth_folder,image_id+".png")
+    pose_path=os.path.join(pose_folder,image_id+".txt")
+    color=cv2.imread(color_path)
+    height,width=color.shape[0:2]
+    depth=cv2.imread(depth_path,cv2.IMREAD_ANYCOLOR|cv2.IMREAD_ANYDEPTH)/1000.0
+    pose=np.loadtxt(pose_path)
+    for object_id in bboxes_dict:
+        object=bboxes_dict[object_id]
+        category=object['category']
+        sizes=object['size']
+        object_vox_coor=vox_coor*sizes[np.newaxis,:]
+        #print(np.amin(object_vox_coor,axis=0),np.amax(object_vox_coor,axis=0))
+        #print(sizes)
+        prev_proj_mat=pre_proj_mates[object_id]
+        wrd2cam_pose = np.linalg.inv(pose)
+        current_proj_mat = np.dot(wrd2cam_pose, prev_proj_mat)
+        K4=np.eye(4)
+        K4[0:3,0:3]=K
+        '''calibrate proj_mat'''
+        up_vectors = np.array([[0, 0, 0, 1.0],
+                               [0, 0.5, 0, 1.0]])
+        up_vec_inimg = np.dot(up_vectors, current_proj_mat.T)
+        up_vec_inimg = np.dot(up_vec_inimg,K4.T)
+        up_x = up_vec_inimg[:, 0] / up_vec_inimg[:, 2]
+        up_y = up_vec_inimg[:, 1] / up_vec_inimg[:, 2]
+        pt1 = np.array((up_x[0], up_y[0]))
+        pt2 = np.array((up_x[1], up_y[1]))
+        up_dir = pt2 - pt1
+        # print(up_dir)
+        product = np.sum(up_dir[np.newaxis, :] * ref, axis=1)
+        max_ind = np.argmax(product)
+        direction = dir_list[max_ind]
+        sky_rot = rotate_mat(direction)
+        #final_proj_mat = np.dot(K4,final_proj_mat)
+        vox_homo=np.concatenate([object_vox_coor,np.ones((object_vox_coor.shape[0],1))],axis=1)
+        vox_proj=np.dot(vox_homo,current_proj_mat.T)
+        vox_proj=np.dot(vox_proj,K4.T)
+        vox_x=vox_proj[:,0]/vox_proj[:,2]
+        vox_y=vox_proj[:,1]/vox_proj[:,2]
+        if np.mean(vox_proj[:,2])>5:
+            continue
+        inside_mask=((vox_x<width-1) &(vox_x>0) &(vox_y<height-1) &(vox_y>0)).astype(np.float32)
+        infrustum_ratio=np.sum(inside_mask)/vox_x.shape[0]
+        if infrustum_ratio < 0.4 and category in ["chair", "stool"]:
+            continue
+        elif infrustum_ratio <0.2:
+            continue
+        #print(object_id,image_id,infrustum_ratio)
+        '''objects visibility check for every frame'''
+        vox_x_inside=vox_x[inside_mask>0].astype(np.int32)
+        vox_y_inside=vox_y[inside_mask>0].astype(np.int32)
+        vox_depth=vox_proj[inside_mask>0,2]
+        #print(depth.shape,np.amax(vox_y_inside),np.amax(vox_x_inside))
+        depth_sample=depth[vox_y_inside,vox_x_inside]
+        depth_mask=(depth_sample>0)&(depth_sample<10.0)
+        depth_sample=depth_sample[depth_mask]
+        vox_depth=vox_depth[depth_mask]
+        if vox_depth.shape[0]<100:
+            continue
+        occluded_ratio=np.sum(((vox_depth-depth_sample)>0.2).astype(np.float32))/vox_depth.shape[0]
+        if occluded_ratio>0.6 and category in ["chair"]: #chair is easily occluded, while table is not
+            continue
+        depth_near_ratio = np.sum((np.abs(vox_depth - depth_sample) < sizes.max() * 0.5).astype(np.float32)) / \
+                           vox_depth.shape[0]
+        if depth_near_ratio < 0.2:
+            continue
+        '''make sure in every image, the object is upward'''
+        bbox=(np.amin(vox_x_inside),np.amin(vox_y_inside),np.amax(vox_x_inside),np.amax(vox_y_inside))
+        rot_image=rotate_image(color,direction)
+        bbox = rotate_bbox(bbox, direction, height, width)
+        crop_image=rot_image[bbox[1]:bbox[3],bbox[0]:bbox[2]]
+        crop_h, crop_w = crop_image.shape[0:2]
+        max_length = max(crop_h, crop_w)
+        if max_length<100:
+            continue
+        pad_image = np.zeros((max_length, max_length, 3))
+        if crop_h > crop_w:
+            margin = crop_h - crop_w
+            pad_image[:, margin // 2:margin // 2 + crop_w] = crop_image[:, :, :]
+            x_start, x_end = bbox[0] - margin // 2, margin // 2 + bbox[2]
+            y_start, y_end = bbox[1], bbox[3]
+        else:
+            margin = crop_w - crop_h
+            pad_image[margin // 2:margin // 2 + crop_h, :] = crop_image[:, :, :]
+            y_start, y_end = bbox[1] - margin // 2, bbox[3] + margin // 2
+            x_start, x_end = bbox[0], bbox[2]
+        pad_image=cv2.resize(pad_image,dsize=(224,224),interpolation=cv2.INTER_LINEAR)
+        image_save_folder = os.path.join(save_folder, "6_images", category + "_%d" % (object_id))
+        os.makedirs(image_save_folder, exist_ok=True)
+        image_save_path=os.path.join(image_save_folder,image_id+".jpg")
+        #print("saving to %s"%(image_save_path))
+        cv2.imwrite(image_save_path,pad_image)
+        proj_mat=np.dot(sky_rot,current_proj_mat)
+        new_K4 = rotate_K(K, direction)
+        new_K4[0, 2] -= x_start
+        new_K4[1, 2] -= y_start
+        new_K4[0] = new_K4[0] / max_length * 224
+        new_K4[1] = new_K4[1] / max_length * 224
+        proj_mat = np.dot(new_K4, proj_mat)
+        proj_save_folder=os.path.join(save_folder,"8_proj_matrix",category+"_%d"%(object_id))
+        os.makedirs(proj_save_folder,exist_ok=True)
+        proj_save_path=os.path.join(proj_save_folder,image_id+".npy")
+        np.save(proj_save_path,proj_mat)
+        '''debug proj matrix'''
+        if args.debug:
+            proj_save_folder=os.path.join(save_folder,"9_proj_images",category+"_%d"%(object_id))
+            os.makedirs(proj_save_folder,exist_ok=True)
+            canvas=copy.deepcopy(pad_image)
+            par_points=obj_points_dict[object_id]
+            par_homo=np.concatenate([par_points,np.ones((par_points.shape[0],1))],axis=1)
+            par_inimg=np.dot(par_homo,proj_mat.T)
+            x=par_inimg[:,0]/par_inimg[:,2]
+            y=par_inimg[:,1]/par_inimg[:,2]
+            x=np.clip(x,a_min=0,a_max=223).astype(np.int32)
+            y=np.clip(y,a_min=0,a_max=223).astype(np.int32)
+            canvas[y,x]=np.array([[0,255,0]])
+            proj_save_path=os.path.join(proj_save_folder,image_id+".jpg")
+            cv2.imwrite(proj_save_path,canvas)

demo/simple_dataset.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import torch
+import torch.nn as nn
+from torch.utils import data
+import os
+from PIL import Image
+from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
+try:
+    from torchvision.transforms import InterpolationMode
+    BICUBIC = InterpolationMode.BICUBIC
+except ImportError:
+    BICUBIC = Image.BICUBIC
+import glob
+import numpy as np
+import open3d as o3d
+import cv2
+from datasets.taxonomy import category_map as category_ids
+classname_map={
+    "chair":["chair","stool"],
+    "cabinet":["dishwasher","cabinet","oven","refrigerator",'storage'],
+    "sofa":["sofa"],
+    "table":["table"],
+    "bed":["bed"],
+    "shelf":["shelf"]
+}
+classname_remap={ #map small categories to six large categories
+    "chair":"chair",
+    "stool":"chair",
+    "dishwasher":"cabinet",
+    "cabinet":"cabinet",
+    "oven":"cabinet",
+    "refrigerator":"cabinet",
+    "storage":"cabinet",
+    "sofa":"sofa",
+    "table":"table",
+    "bed":"bed",
+    "shelf":"shelf"
+}
+def image_transform(n_px):
+    return Compose([
+        Resize(n_px, interpolation=BICUBIC),
+        CenterCrop(n_px),
+        ToTensor(),
+        Normalize((0.48145466, 0.4578275, 0.40821073),
+                 (0.26862954, 0.26130258, 0.27577711)),
+    ])
+class Simple_InTheWild_dataset(data.Dataset):
+    def __init__(self,dataset_dir="/data1/haolin/data/real_scene_process_data",scene_id="letian-310",n_px=224):
+        self.dataset_dir=dataset_dir
+        self.preprocess = image_transform(n_px)
+        self.image_path = []
+        if scene_id=="all":
+            scene_list=os.listdir(self.dataset_dir)
+            for id in scene_list:
+                image_folder=os.path.join(self.dataset_dir,id,"6_images")
+                self.image_path+=glob.glob(image_folder+"/*/*jpg")
+        else:
+            image_folder = os.path.join(self.dataset_dir, scene_id, "6_images")
+            self.image_path += glob.glob(image_folder + "/*/*jpg")
+    def __len__(self):
+        return len(self.image_path)
+    def __getitem__(self,index):
+        path=self.image_path[index]
+        basename=os.path.basename(path)[:-4]
+        model_id=path.split(os.sep)[-2]
+        scene_id=path.split(os.sep)[-4]
+        image=Image.open(path)
+        image_tensor=self.preprocess(image)
+        return {"images":image_tensor,"image_name":basename,"model_id":model_id,"scene_id":scene_id}
+class InTheWild_Dataset(data.Dataset):
+    def __init__(self,data_dir="/data1/haolin/data/real_scene_process_data/letian-310",scene_id="letian-310",
+                 par_pc_size=2048,category="chair",max_n_imgs=5):
+        self.par_pc_size=par_pc_size
+        self.data_dir=data_dir
+        self.category=category
+        self.max_n_imgs=max_n_imgs
+        self.models=[]
+        category_list=classname_map[category]
+        modelid_list=[]
+        for cat in category_list:
+            if scene_id=="all":
+                scene_list=os.listdir(self.data_dir)
+                for id in scene_list:
+                    data_folder=os.path.join(self.data_dir,id)
+                    modelid_list+=glob.glob(data_folder+"/6_images/%s*"%(cat))
+            else:
+                data_folder=os.path.join(self.data_dir,scene_id)
+                modelid_list+=glob.glob(data_folder+"/6_images/%s*"%(cat))
+        sceneid_list = [item.split("/")[-3] for item in modelid_list]
+        modelid_list=[item.split("/")[-1] for item in modelid_list]
+        for idx,modelid in enumerate(modelid_list):
+            scene_id=sceneid_list[idx]
+            image_folder=os.path.join(self.data_dir,scene_id,"6_images",modelid)
+            image_list=os.listdir(image_folder)
+            if len(image_list)==0:
+                continue
+            imageid_list=[item[0:-4] for item in image_list]
+            imageid_list.sort(key=lambda x:int(x))
+            partial_path=os.path.join(self.data_dir,scene_id,"5_partial_points",modelid+".ply")
+            if os.path.exists(partial_path)==False: continue
+            self.models+=[
+                {'model_id':modelid,
+                 "scene_id":scene_id,
+                 "partial_path":partial_path,
+                 "imageid_list":imageid_list,
+                 }
+            ]
+    def __len__(self):
+        return len(self.models)
+    def __getitem__(self,idx):
+        model = self.models[idx]['model_id']
+        scene_id=self.models[idx]['scene_id']
+        imageid_list = self.models[idx]['imageid_list']
+        partial_path=self.models[idx]['partial_path']
+        n_frames=min(len(imageid_list),self.max_n_imgs)
+        img_indexes=np.linspace(start=0,stop=len(imageid_list)-1,num=n_frames).astype(np.int32)
+        '''load partial points'''
+        par_point_o3d = o3d.io.read_point_cloud(partial_path)
+        par_points = np.asarray(par_point_o3d.points)
+        replace = par_points.shape[0] < self.par_pc_size
+        ind = np.random.default_rng().choice(par_points.shape[0], self.par_pc_size, replace=replace)
+        par_points=par_points[ind]
+        par_points=torch.from_numpy(par_points).float()
+        '''load image features'''
+        image_list=[]
+        valid_frames = []
+        image_namelist=[]
+        for img_index in img_indexes:
+            image_name = imageid_list[img_index]
+            image_feat_path = os.path.join(self.data_dir,scene_id, "7_img_feature", model,image_name + '.npz')
+            image = np.load(image_feat_path)["img_features"]
+            image_list.append(torch.from_numpy(image).float())
+            image_namelist.append(image_name)
+            valid_frames.append(True)
+        '''load original image'''
+        org_img_list=[]
+        for img_index in img_indexes:
+            image_name = imageid_list[img_index]
+            image_path = os.path.join(self.data_dir,scene_id, "6_images", model,image_name+".jpg")
+            org_image = cv2.imread(image_path)
+            org_image = cv2.resize(org_image, dsize=(224, 224), interpolation=cv2.INTER_LINEAR)
+            org_img_list.append(org_image)
+        '''load project matrix'''
+        proj_mat_list=[]
+        for img_index in img_indexes:
+            image_name = imageid_list[img_index]
+            proj_mat_path = os.path.join(self.data_dir,scene_id, "8_proj_matrix", model, image_name + ".npy")
+            proj_mat = np.load(proj_mat_path)
+            proj_mat_list.append(proj_mat)
+        '''load transformation matrix'''
+        tran_mat_path = os.path.join(self.data_dir,scene_id, "10_tran_matrix", model+".npy")
+        tran_mat = np.load(tran_mat_path)
+        '''category code, not used for category specific models'''
+        category_id = category_ids[self.category]
+        one_hot = torch.zeros((6)).float()
+        one_hot[category_id] = 1.0
+        ret_dict={
+            "model_id":model,
+            "scene_id":scene_id,
+            "par_points":par_points,
+            "proj_mat":torch.stack([torch.from_numpy(mat) for mat in proj_mat_list], dim=0),
+            "tran_mat":torch.from_numpy(tran_mat).float(),
+            "image":torch.stack(image_list,dim=0),
+            "org_image":org_img_list,
+            "valid_frames":torch.tensor(valid_frames).bool(),
+            "category_ids": category_id,
+            "category_code":one_hot,
+        }
+        return ret_dict

train_VAE.sh CHANGED Viewed

@@ -12,4 +12,4 @@ train_triplane_vae.py \
 --clip_grad 0.35 \
 --category chair \
 --data-pth ../data \
---replica 5

 --clip_grad 0.35 \
 --category chair \
 --data-pth ../data \
+--replica 5   7

util/misc.py CHANGED Viewed

@@ -15,7 +15,7 @@ from pathlib import Path
 import torch
 import torch.distributed as dist
 #from torch._six import inf
-import inf
 import numpy as np
 def log_codefiles(data_root,save_root):

 import torch
 import torch.distributed as dist
 #from torch._six import inf
+import math
 import numpy as np
 def log_codefiles(data_root,save_root):

util/simple_image_loader.py CHANGED Viewed

@@ -16,12 +16,8 @@ def image_transform(n_px):
         Resize(n_px, interpolation=BICUBIC),
         CenterCrop(n_px),
         ToTensor(),
-        # Normalize((123.675/255.0,116.28/255.0,103.53/255.0),
-        #           (58.395/255.0,57.12/255.0,57.375/255.0))
         Normalize((0.48145466, 0.4578275, 0.40821073),
                  (0.26862954, 0.26130258, 0.27577711)),
-        # Normalize((0.5, 0.5, 0.5),
-        #           (0.5, 0.5, 0.5)),
     ])
 class Image_dataset(data.Dataset):

         Resize(n_px, interpolation=BICUBIC),
         CenterCrop(n_px),
         ToTensor(),
         Normalize((0.48145466, 0.4578275, 0.40821073),
                  (0.26862954, 0.26130258, 0.27577711)),
     ])
 class Image_dataset(data.Dataset):