|
|
|
import os |
|
import torch |
|
import argparse |
|
import numpy as np |
|
import open3d as o3d |
|
from huggingface_hub import hf_hub_download, HfFolder |
|
|
|
from segment import seg_point, seg_box, seg_mask |
|
import sam2point.dataset as dataset |
|
import sam2point.configs as configs |
|
from sam2point.voxelizer import Voxelizer |
|
from sam2point.utils import cal |
|
import matplotlib.pyplot as plt |
|
import plotly.graph_objects as go |
|
|
|
print("Torch CUDA:", torch.cuda.is_available()) |
|
torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__() |
|
|
|
def run_demo(dataset_name, prompt_type, sample_idx, prompt_idx, voxel_size, theta, mode, ret_prompt): |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('--dataset', choices=['S3DIS', 'ScanNet', 'Objaverse', 'KITTI', 'Semantic3D'], default='Objaverse', help='dataset selected') |
|
parser.add_argument('--prompt_type', choices=['point', 'box', 'mask'], default='point', help='prompt type selected') |
|
parser.add_argument('--sample_idx', type=int, default=2, help='the index of the scene or object') |
|
parser.add_argument('--prompt_idx', type=int, default=0, help='the index of the prompt') |
|
parser.add_argument('--voxel_size', type=float, default=0.02, help='voxel size') |
|
parser.add_argument('--theta', type=float, default=0.5) |
|
parser.add_argument('--mode', type=str, default='bilinear') |
|
parser.add_argument("--ret_prompt", action="store_true") |
|
args = parser.parse_args() |
|
args.dataset, args.prompt_type, args.sample_idx, args.prompt_idx = dataset_name, prompt_type, sample_idx, prompt_idx |
|
args.voxel_size, args.theta, args.mode, args.ret_prompt = voxel_size, theta, mode, ret_prompt |
|
print(args) |
|
|
|
name_list = [args.dataset, "sample" + str(args.sample_idx), args.prompt_type + "-prompt" + str(args.prompt_idx)] |
|
name = '_'.join(name_list) |
|
|
|
|
|
repo_id = "ZiyuG/Cache" |
|
result_name = "cache_results/" + name + '.npy' |
|
prompt_name = "cache_prompt/" + name + '.npy' |
|
token = os.getenv('HF_TOKEN') |
|
|
|
try: |
|
result_file = hf_hub_download(repo_id=repo_id, filename=result_name, use_auth_token=token, repo_type='dataset') |
|
prompt_file = hf_hub_download(repo_id=repo_id, filename=prompt_name, use_auth_token=token, repo_type='dataset') |
|
new_color = np.load(result_file) |
|
PROMPT = np.load(prompt_file) |
|
if not args.ret_prompt: return new_color, PROMPT |
|
else: return PROMPT |
|
except Exception as e: |
|
if os.path.exists("./cache_results/" + name + '.npy') and os.path.exists("./cache_prompt/" + name + '.npy'): |
|
new_color = np.load("./cache_results/" + name + '.npy') |
|
PROMPT = np.load("./cache_prompt/" + name + '.npy') |
|
if not args.ret_prompt: return new_color, PROMPT |
|
else: return PROMPT |
|
|
|
if args.dataset == 'S3DIS': |
|
info = configs.S3DIS_samples[args.sample_idx] |
|
|
|
if args.prompt_type == 'point' and args.ret_prompt: return list(np.array(info['point_prompts'])[args.prompt_idx]) |
|
elif args.prompt_type == 'box' and args.ret_prompt: return list(np.array(info['box_prompts'])[args.prompt_idx]) |
|
point, color = dataset.load_S3DIS_sample(info['path']) |
|
elif args.dataset == 'ScanNet': |
|
info = configs.ScanNet_samples[args.sample_idx] |
|
|
|
if args.prompt_type == 'point' and args.ret_prompt: return list(np.array(info['point_prompts'])[args.prompt_idx]) |
|
elif args.prompt_type == 'box' and args.ret_prompt: return list(np.array(info['box_prompts'])[args.prompt_idx]) |
|
point, color = dataset.load_ScanNet_sample(info['path']) |
|
elif args.dataset == 'Objaverse': |
|
info = configs.Objaverse_samples[args.sample_idx] |
|
|
|
if args.prompt_type == 'point' and args.ret_prompt: return list(np.array(info['point_prompts'])[args.prompt_idx]) |
|
elif args.prompt_type == 'box' and args.ret_prompt: return list(np.array(info['box_prompts'])[args.prompt_idx]) |
|
point, color = dataset.load_Objaverse_sample(info['path']) |
|
args.voxel_size = info[configs.VOXEL[args.prompt_type]][args.prompt_idx] |
|
elif args.dataset == 'KITTI': |
|
info = configs.KITTI_samples[args.sample_idx] |
|
|
|
if args.prompt_type == 'point' and args.ret_prompt: return list(np.array(info['point_prompts'])[args.prompt_idx]) |
|
elif args.prompt_type == 'box' and args.ret_prompt: return list(np.array(info['box_prompts'])[args.prompt_idx]) |
|
point, color = dataset.load_KITTI_sample(info['path']) |
|
args.voxel_size = info[configs.VOXEL[args.prompt_type]][args.prompt_idx] |
|
elif args.dataset == 'Semantic3D': |
|
info = configs.Semantic3D_samples[args.sample_idx] |
|
|
|
if args.prompt_type == 'point' and args.ret_prompt: return list(np.array(info['point_prompts'])[args.prompt_idx]) |
|
elif args.prompt_type == 'box' and args.ret_prompt: return list(np.array(info['box_prompts'])[args.prompt_idx]) |
|
point, color = dataset.load_Semantic3D_sample(info['path'], args.sample_idx) |
|
args.voxel_size = info[configs.VOXEL[args.prompt_type]][args.prompt_idx] |
|
|
|
point_color = np.concatenate([point, color], axis=1) |
|
voxelizer = Voxelizer(voxel_size=args.voxel_size, clip_bound=None) |
|
|
|
labels_in = point[:, :1].astype(int) |
|
locs, feats, labels, inds_reconstruct = voxelizer.voxelize(point, color, labels_in) |
|
|
|
if args.prompt_type == 'point': |
|
if args.ret_prompt: return list(np.array(info['point_prompts'])[args.prompt_idx]) |
|
mask = seg_point(locs, feats, info['point_prompts'], args) |
|
point_prompts = np.array(info['point_prompts']) |
|
prompt_point = list(point_prompts[args.prompt_idx]) |
|
prompt_box = None |
|
PROMPT = prompt_point |
|
elif args.prompt_type == 'box': |
|
if args.ret_prompt: return list(np.array(info['box_prompts'])[args.prompt_idx]) |
|
mask = seg_box(locs, feats, info['box_prompts'], args) |
|
point_prompts = np.array(info['box_prompts']) |
|
prompt_point = None |
|
prompt_box = list(point_prompts[args.prompt_idx]) |
|
PROMPT = prompt_box |
|
elif args.prompt_type == 'mask': |
|
if 'mask_prompts' not in info: info['mask_prompts'] = info['point_prompts'] |
|
mask, prompt_mask = seg_mask(locs, feats, info['mask_prompts'], args) |
|
prompt_point, prompt_box = None, None |
|
point_locs = locs[inds_reconstruct] |
|
point_prompt_mask = prompt_mask[point_locs[:, 0], point_locs[:, 1], point_locs[:, 2]] |
|
point_prompt_mask = point_prompt_mask.unsqueeze(-1) |
|
point_prompt_mask_not = ~point_prompt_mask |
|
color_prompt_mask = color * point_prompt_mask_not.numpy() + (color * 0 + np.array([[1., 0., 0.]])) * point_prompt_mask.numpy() |
|
PROMPT = color_prompt_mask |
|
if args.ret_prompt: |
|
return color_prompt_mask |
|
|
|
point_locs = locs[inds_reconstruct] |
|
point_mask = mask[point_locs[:, 0], point_locs[:, 1], point_locs[:, 2]] |
|
|
|
point_mask = point_mask.unsqueeze(-1) |
|
point_mask_not = ~point_mask |
|
|
|
point, color = point_color[:, :3], point_color[:, 3:] |
|
new_color = color * point_mask_not.numpy() + (color * 0 + np.array([[0., 1., 0.]])) * point_mask.numpy() |
|
|
|
name_list = [args.dataset, "sample" + str(args.sample_idx), args.prompt_type + "-prompt" + str(args.prompt_idx)] |
|
name = '_'.join(name_list) + 'frames' |
|
|
|
|
|
name_list = [args.dataset, "sample" + str(args.sample_idx), args.prompt_type + "-prompt" + str(args.prompt_idx)] |
|
name = '_'.join(name_list) |
|
os.makedirs("cache_results", exist_ok=True) |
|
os.makedirs("cache_prompt", exist_ok=True) |
|
np.save("./cache_results/" + name + '.npy', new_color) |
|
np.save("./cache_prompt/" + name + '.npy', PROMPT) |
|
return new_color, PROMPT |
|
|
|
def create_box(prompt): |
|
x_min, y_min, z_min, x_max, y_max, z_max = tuple(prompt) |
|
bbox_points = np.array([ |
|
[x_min, y_min, z_min], |
|
[x_max, y_min, z_min], |
|
[x_max, y_max, z_min], |
|
[x_min, y_max, z_min], |
|
[x_min, y_min, z_max], |
|
[x_max, y_min, z_max], |
|
[x_max, y_max, z_max], |
|
[x_min, y_max, z_max] |
|
]) |
|
edges = [ |
|
(0, 1), (1, 2), (2, 3), (3, 0), |
|
(4, 5), (5, 6), (6, 7), (7, 4), |
|
(0, 4), (1, 5), (2, 6), (3, 7) |
|
] |
|
bbox_lines = [] |
|
f = 1 |
|
for start, end in edges: |
|
bbox_lines.append(go.Scatter3d( |
|
x=[bbox_points[start, 0], bbox_points[end, 0]], |
|
y=[bbox_points[start, 1], bbox_points[end, 1]], |
|
z=[bbox_points[start, 2], bbox_points[end, 2]], |
|
mode='lines', |
|
line=dict(color='rgb(220, 20, 60)', width=6), |
|
name="Box Prompt" if f == 1 else "", |
|
showlegend=True if f == 1 else False |
|
)) |
|
f = 0 |
|
return bbox_lines |