Spaces:

xiexh20
/

HDM-interaction-recon

Sleeping

xxie

add ddim support

531dfb5 20 days ago

No virus

13.6 kB

	"""
	Demo for template-free reconstruction

	python demo.py model=ho-attn run.image_path=/BS/xxie-2/work/HDM/outputs/000000017450/k1.color.jpg run.job=sample model.predict_binary=True dataset.std_coverage=3.0
	"""
	import pickle as pkl
	import sys, os
	import os.path as osp
	from typing import Iterable, Optional

	import cv2
	from accelerate import Accelerator
	from tqdm import tqdm
	from glob import glob

	sys.path.append(os.getcwd())
	import hydra
	import torch
	import numpy as np
	import imageio
	from torch.utils.data import DataLoader
	from pytorch3d.datasets import R2N2, collate_batched_meshes
	from pytorch3d.structures import Pointclouds
	from pytorch3d.renderer import PerspectiveCameras, look_at_view_transform
	from pytorch3d.io import IO
	import torchvision.transforms.functional as TVF
	from huggingface_hub import hf_hub_download

	import training_utils
	from configs.structured import ProjectConfig
	from dataset.demo_dataset import DemoDataset
	from model import CrossAttenHODiffusionModel, ConditionalPCDiffusionSeparateSegm
	from render.pyt3d_wrapper import PcloudRenderer


	class DemoRunner:
	def __init__(self, cfg: ProjectConfig):
	cfg.model.model_name, cfg.model.predict_binary = 'pc2-diff-ho-sepsegm', True
	model_stage1 = ConditionalPCDiffusionSeparateSegm(**cfg.model)
	cfg.model.model_name, cfg.model.predict_binary = 'diff-ho-attn', False # stage 2 does not predict segmentation
	model_stage2 = CrossAttenHODiffusionModel(**cfg.model)

	# Load ckpt from hf
	ckpt_file1 = hf_hub_download("xiexh20/HDM-models", f'{cfg.run.stage1_name}.pth')
	self.load_checkpoint(ckpt_file1, model_stage1)
	ckpt_file2 = hf_hub_download("xiexh20/HDM-models", f'{cfg.run.stage2_name}.pth')
	self.load_checkpoint(ckpt_file2, model_stage2)

	self.model_stage1, self.model_stage2 = model_stage1, model_stage2
	self.model_stage1.eval()
	self.model_stage2.eval()
	self.model_stage1.to('cuda')
	self.model_stage2.to('cuda')

	self.cfg = cfg
	self.io_pc = IO()

	# For visualization
	self.renderer = PcloudRenderer(image_size=cfg.dataset.image_size, radius=0.0075)
	self.rend_size = cfg.dataset.image_size
	self.device = 'cuda'

	def load_checkpoint(self, ckpt_file1, model_stage1, device='cpu'):
	checkpoint = torch.load(ckpt_file1, map_location=device)
	state_dict, key = checkpoint['model'], 'model'
	if any(k.startswith('module.') for k in state_dict.keys()):
	state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
	print('Removed "module." from checkpoint state dict')
	missing_keys, unexpected_keys = model_stage1.load_state_dict(state_dict, strict=False)
	print(f'Loaded model checkpoint {key} from {ckpt_file1}')
	if len(missing_keys):
	print(f' - Missing_keys: {missing_keys}')
	if len(unexpected_keys):
	print(f' - Unexpected_keys: {unexpected_keys}')

	def reload_checkpoint(self, cat_name):
	"load checkpoint of models fine tuned on specific categories"
	ckpt_file1 = hf_hub_download("xiexh20/HDM-models", f'{self.cfg.run.stage1_name}-{cat_name}.pth')
	self.load_checkpoint(ckpt_file1, self.model_stage1, device=self.device)
	ckpt_file2 = hf_hub_download("xiexh20/HDM-models", f'{self.cfg.run.stage2_name}-{cat_name}.pth')
	self.load_checkpoint(ckpt_file2, self.model_stage2, device=self.device)

	@torch.no_grad()
	def run(self):
	"simply run the demo on given images, and save the results"
	# Set random seed
	training_utils.set_seed(self.cfg.run.seed)

	outdir = osp.join(self.cfg.run.code_dir_abs, 'outputs/demo')
	os.makedirs(outdir, exist_ok=True)
	cfg = self.cfg

	# Init data
	image_files = sorted(glob(cfg.run.image_path))
	data = DemoDataset(image_files,
	(cfg.dataset.image_size, cfg.dataset.image_size),
	cfg.dataset.std_coverage)
	dataloader = DataLoader(data, batch_size=cfg.dataloader.batch_size,
	collate_fn=collate_batched_meshes,
	num_workers=1, shuffle=False)
	dataloader = dataloader
	progress_bar = tqdm(dataloader)
	for batch_idx, batch in enumerate(progress_bar):
	progress_bar.set_description(f'Processing batch {batch_idx:4d} / {len(dataloader):4d}')

	out_stage1, out_stage2 = self.forward_batch(batch, cfg)

	bs = len(out_stage1)
	camera_full = PerspectiveCameras(
	R=torch.stack(batch['R']),
	T=torch.stack(batch['T']),
	K=torch.stack(batch['K']),
	device='cuda',
	in_ndc=True)

	# save output
	for i in range(bs):
	image_path = str(batch['image_path'])
	folder, fname = osp.basename(osp.dirname(image_path)), osp.splitext(osp.basename(image_path))[0]
	out_i = osp.join(outdir, folder)
	os.makedirs(out_i, exist_ok=True)
	self.io_pc.save_pointcloud(data=out_stage1[i],
	path=osp.join(out_i, f'{fname}_stage1.ply'))
	self.io_pc.save_pointcloud(data=out_stage2[i],
	path=osp.join(out_i, f'{fname}_stage2.ply'))
	TVF.to_pil_image(batch['images'][i]).save(osp.join(out_i, f'{fname}_input.png'))

	# Save metadata as well
	metadata = dict(index=i,
	camera=camera_full[i],
	image_size_hw=batch['image_size_hw'][i],
	image_path=batch['image_path'][i])
	torch.save(metadata, osp.join(out_i, f'{fname}_meta.pth'))

	# Visualize
	# front_camera = camera_full[i]
	pc_comb = Pointclouds([out_stage1[i].points_packed(), out_stage2[i].points_packed()],
	features=[out_stage1[i].features_packed(), out_stage2[i].features_packed()])
	video_file = osp.join(out_i, f'{fname}_360view.mp4')
	video_writer = imageio.get_writer(video_file, format='FFMPEG', mode='I', fps=1)

	# first render front view
	rend_stage1, _ = self.renderer.render(out_stage1[i], camera_full[i], mode='mask')
	rend_stage2, _ = self.renderer.render(out_stage2[i], camera_full[i], mode='mask')
	comb = np.concatenate([batch['images'][i].permute(1, 2, 0).cpu().numpy(), rend_stage1, rend_stage2], 1)
	video_writer.append_data((comb*255).astype(np.uint8))

	for azim in range(180, 180+360, 30):
	R, T = look_at_view_transform(1.7, 0, azim, up=((0, -1, 0),), )
	side_camera = PerspectiveCameras(image_size=((self.rend_size, self.rend_size),),
	device=self.device,
	R=R.repeat(2, 1, 1), T=T.repeat(2, 1),
	focal_length=self.rend_size * 1.5,
	principal_point=((self.rend_size / 2., self.rend_size / 2.),),
	in_ndc=False)
	rend, mask = self.renderer.render(pc_comb, side_camera, mode='mask')

	imgs = [batch['images'][i].permute(1, 2, 0).cpu().numpy()]
	imgs.extend([rend[0], rend[1]])
	video_writer.append_data((np.concatenate(imgs, 1)*255).astype(np.uint8))
	print(f"Visualization saved to {out_i}")

	@torch.no_grad()
	def forward_batch(self, batch, cfg):
	"""
	forward one batch
	:param batch:
	:param cfg:
	:return: predicted point clouds of stage 1 and 2
	"""
	camera_full = PerspectiveCameras(
	R=torch.stack(batch['R']),
	T=torch.stack(batch['T']),
	K=torch.stack(batch['K']),
	device='cuda',
	in_ndc=True)
	out_stage1 = self.model_stage1.forward_sample(num_points=cfg.dataset.max_points,
	camera=camera_full,
	image_rgb=torch.stack(batch['images']).to('cuda'),
	mask=torch.stack(batch['masks']).to('cuda'),
	scheduler=cfg.run.diffusion_scheduler,
	num_inference_steps=cfg.run.num_inference_steps,
	eta=cfg.model.ddim_eta,
	)
	# segment and normalize human/object
	bs = len(out_stage1)
	pred_hum, pred_obj = [], [] # predicted human/object points
	cent_hum_pred, cent_obj_pred = [], []
	radius_hum_pred, radius_obj_pred = [], []
	T_hum, T_obj = [], []
	num_samples = int(cfg.dataset.max_points / 2)
	for i in range(bs):
	pc: Pointclouds = out_stage1[i]
	vc = pc.features_packed().cpu() # (P, 3), human is light blue [0.1, 1.0, 1.0], object light green [0.5, 1.0, 0]
	points = pc.points_packed().cpu() # (P, 3)
	mask_hum = vc[:, 2] > 0.5
	pc_hum, pc_obj = points[mask_hum], points[~mask_hum]
	# Up/Down-sample the points
	pc_obj = self.upsample_predicted_pc(num_samples, pc_obj)
	pc_hum = self.upsample_predicted_pc(num_samples, pc_hum)

	# Normalize
	cent_hum, cent_obj = torch.mean(pc_hum, 0, keepdim=True), torch.mean(pc_obj, 0, keepdim=True)
	scale_hum = torch.sqrt(torch.sum((pc_hum - cent_hum) ** 2, -1).max())
	scale_obj = torch.sqrt(torch.sum((pc_obj - cent_obj) ** 2, -1).max())
	pc_hum = (pc_hum - cent_hum) / (2 * scale_hum)
	pc_obj = (pc_obj - cent_obj) / (2 * scale_obj)
	# Also update camera parameters for separate human + object
	T_hum_scaled = (batch['T_ho'][i] + cent_hum.squeeze(0)) / (2 * scale_hum)
	T_obj_scaled = (batch['T_ho'][i] + cent_obj.squeeze(0)) / (2 * scale_obj)

	pred_hum.append(pc_hum)
	pred_obj.append(pc_obj)
	cent_hum_pred.append(cent_hum.squeeze(0))
	cent_obj_pred.append(cent_obj.squeeze(0))
	T_hum.append(T_hum_scaled * torch.tensor([-1, -1, 1])) # apply opencv to pytorch3d transform: flip x and y
	T_obj.append(T_obj_scaled * torch.tensor([-1, -1, 1]))
	radius_hum_pred.append(scale_hum)
	radius_obj_pred.append(scale_obj)
	# Pack data into a new batch dict
	camera_hum = PerspectiveCameras(
	R=torch.stack(batch['R']),
	T=torch.stack(T_hum),
	K=torch.stack(batch['K_hum']),
	device='cuda',
	in_ndc=True
	)
	camera_obj = PerspectiveCameras(
	R=torch.stack(batch['R']),
	T=torch.stack(T_obj),
	K=torch.stack(batch['K_obj']), # the camera should be human/object specific!!!
	device='cuda',
	in_ndc=True
	)
	# use pc from predicted
	pc_hum = Pointclouds([x.to('cuda') for x in pred_hum])
	pc_obj = Pointclouds([x.to('cuda') for x in pred_obj])
	# use center and radius from predicted
	cent_hum = torch.stack(cent_hum_pred, 0).to('cuda')
	cent_obj = torch.stack(cent_obj_pred, 0).to('cuda') # B, 3
	radius_hum = torch.stack(radius_hum_pred, 0).to('cuda') # B, 1
	radius_obj = torch.stack(radius_obj_pred, 0).to('cuda')
	out_stage2: Pointclouds = self.model_stage2.forward_sample(
	num_points=num_samples,
	camera=camera_hum,
	image_rgb=torch.stack(batch['images_hum'], 0).to('cuda'),
	mask=torch.stack(batch['masks_hum'], 0).to('cuda'),
	gt_pc=pc_hum,
	rgb_obj=torch.stack(batch['images_obj'], 0).to('cuda'),
	mask_obj=torch.stack(batch['masks_obj'], 0).to('cuda'),
	pc_obj=pc_obj,
	camera_obj=camera_obj,
	cent_hum=cent_hum,
	cent_obj=cent_obj,
	radius_hum=radius_hum.unsqueeze(-1),
	radius_obj=radius_obj.unsqueeze(-1),
	sample_from_interm=True,
	noise_step=cfg.run.sample_noise_step,
	scheduler=cfg.run.diffusion_scheduler,
	num_inference_steps=cfg.run.num_inference_steps,
	eta=cfg.model.ddim_eta,
	)
	return out_stage1, out_stage2

	def upsample_predicted_pc(self, num_samples, pc_obj):
	"""
	Up/Downsample the points to given number
	:param num_samples: the target number
	:param pc_obj: (N, 3)
	:return: (num_samples, 3)
	"""
	if len(pc_obj) > num_samples:
	ind_obj = np.random.choice(len(pc_obj), num_samples)
	else:
	ind_obj = np.concatenate([np.arange(len(pc_obj)), np.random.choice(len(pc_obj), num_samples - len(pc_obj))])
	pc_obj = pc_obj.clone()[torch.from_numpy(ind_obj).long().to(pc_obj.device)]
	return pc_obj


	@hydra.main(config_path='configs', config_name='configs', version_base='1.1')
	def main(cfg: ProjectConfig):
	runner = DemoRunner(cfg)
	runner.run()


	if __name__ == '__main__':
	main()