PromptHMR / scripts /demo_phmr.py
yufu-wang
Initial release
820b02b
import os
import sys
sys.path.insert(0, os.path.dirname(__file__) + '/..')
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import cv2
import tyro
import torch
import numpy as np
from glob import glob
from torch.amp import autocast
from ultralytics import YOLO
from data_config import SMPL_PATH, SMPLX_PATH
from prompt_hmr import load_model_from_folder
from prompt_hmr.smpl_family import SMPLX, SMPL
from prompt_hmr.utils.visualizer import draw_boxes, save_ply
from prompt_hmr.vis.traj import align_meshes_to_ground, align_meshes_to_gravity
from prompt_hmr.models.inference import prepare_batch, predict_masks
from prompt_hmr.vis.viser import viser_vis_human
from pipeline.camcalib.model import CameraRegressorNetwork
from segment_anything import SamPredictor, sam_model_registry
def main(image='data/examples/example_1.jpg', gravity_align=False, detect_conf=0.3, render_overlap=True):
savedir = os.path.basename(image)
os.makedirs(savedir, exist_ok=True)
smplx = SMPLX(SMPLX_PATH).cuda()
yolo = YOLO("data/pretrain/yolov8x.pt")
phmr = load_model_from_folder('data/pretrain/phmr')
# Prompt HMR
img = cv2.imread(image)[:,:,::-1]
detection = yolo(image, verbose=False, conf=detect_conf, classes=0)
boxes = detection[0].boxes.data.cpu()
inputs = [{'image_cv': img, 'boxes': boxes, 'text': None, 'masks': None}]
# Inference
with torch.no_grad() and autocast('cuda'):
batch = prepare_batch(inputs, img_size=896, interaction=False)
output = phmr(batch, use_mean_hands=True)[0]
# Reconstruction
keys = ['pose', 'betas', 'transl', 'rotmat', 'vertices', 'body_joints', 'cam_int']
output = {k:output[k].detach().cpu() for k in keys}
torch.save(output, f'{savedir}/output.pt')
# Render
verts = output['vertices']
focal = batch[0]['cam_int_original'][0,0,0]
if render_overlap:
from prompt_hmr.vis.renderer import Renderer
renderer = Renderer(img.shape[1], img.shape[0], focal, bin_size=0)
img_rend = renderer.render_meshes(verts, smplx.faces, img)
cv2.imwrite(f'{savedir}/output.jpg', img_rend[:,:,::-1])
print(f'Rendered image saved to {os.path.abspath(savedir)}/output.jpg')
# Align to gravity coordinate (optional)
camera = np.eye(4)
if gravity_align:
# Use SPEC to estimate gravity direction
spec = CameraRegressorNetwork()
spec = spec.load_ckpt('data/pretrain/camcalib_sa_biased_l2.ckpt').to('cuda')
with torch.no_grad():
preds = spec(img, transform_data=True)
pred_vfov, pred_pitch, pred_roll = preds
pred_f_pix = img.shape[0] / 2. / np.tan(pred_vfov / 2.)
gravity_cam = spec.to_gravity_cam(pred_pitch, pred_roll)
verts, [gv, gf, gc], R, T = align_meshes_to_gravity(
verts,
gravity_cam,
floor_scale=2,
floor_color=[[0.73, 0.78, 0.82],
[0.61, 0.69, 0.72]]
)
cam_r = R.mT
cam_t = - cam_r @ T
camera = np.eye(4)
camera[:3,:3] = cam_r
camera[:3, 3] = cam_t
floor = [gv.numpy(), gf.numpy()]
else:
rot_180 = np.eye(3)
rot_180[1, 1] = -1
rot_180[2, 2] = -1
verts = verts @ rot_180
camera = np.eye(4)
camera[:3, :3] = rot_180 @ camera[:3, :3]
camera[:3, 3] = camera[:3, 3] @ rot_180
floor = None
# Webapp visualization
viser_vis_human(verts, smplx.faces, cameras=[camera], floor=floor, image=img)
if __name__ == '__main__':
tyro.cli(main)