|
import os |
|
import sys |
|
sys.path.insert(0, os.path.dirname(__file__) + '/..') |
|
os.environ["CUDA_VISIBLE_DEVICES"]="0" |
|
import cv2 |
|
import tyro |
|
import torch |
|
import numpy as np |
|
from glob import glob |
|
from torch.amp import autocast |
|
|
|
from ultralytics import YOLO |
|
from data_config import SMPL_PATH, SMPLX_PATH |
|
from prompt_hmr import load_model_from_folder |
|
from prompt_hmr.smpl_family import SMPLX, SMPL |
|
from prompt_hmr.utils.visualizer import draw_boxes, save_ply |
|
|
|
from prompt_hmr.vis.traj import align_meshes_to_ground, align_meshes_to_gravity |
|
from prompt_hmr.models.inference import prepare_batch, predict_masks |
|
from prompt_hmr.vis.viser import viser_vis_human |
|
|
|
from pipeline.camcalib.model import CameraRegressorNetwork |
|
from segment_anything import SamPredictor, sam_model_registry |
|
|
|
|
|
def main(image='data/examples/example_1.jpg', gravity_align=False, detect_conf=0.3, render_overlap=True): |
|
savedir = os.path.basename(image) |
|
os.makedirs(savedir, exist_ok=True) |
|
|
|
smplx = SMPLX(SMPLX_PATH).cuda() |
|
yolo = YOLO("data/pretrain/yolov8x.pt") |
|
phmr = load_model_from_folder('data/pretrain/phmr') |
|
|
|
|
|
img = cv2.imread(image)[:,:,::-1] |
|
detection = yolo(image, verbose=False, conf=detect_conf, classes=0) |
|
boxes = detection[0].boxes.data.cpu() |
|
inputs = [{'image_cv': img, 'boxes': boxes, 'text': None, 'masks': None}] |
|
|
|
|
|
with torch.no_grad() and autocast('cuda'): |
|
batch = prepare_batch(inputs, img_size=896, interaction=False) |
|
output = phmr(batch, use_mean_hands=True)[0] |
|
|
|
|
|
keys = ['pose', 'betas', 'transl', 'rotmat', 'vertices', 'body_joints', 'cam_int'] |
|
output = {k:output[k].detach().cpu() for k in keys} |
|
torch.save(output, f'{savedir}/output.pt') |
|
|
|
|
|
verts = output['vertices'] |
|
focal = batch[0]['cam_int_original'][0,0,0] |
|
if render_overlap: |
|
from prompt_hmr.vis.renderer import Renderer |
|
renderer = Renderer(img.shape[1], img.shape[0], focal, bin_size=0) |
|
img_rend = renderer.render_meshes(verts, smplx.faces, img) |
|
cv2.imwrite(f'{savedir}/output.jpg', img_rend[:,:,::-1]) |
|
print(f'Rendered image saved to {os.path.abspath(savedir)}/output.jpg') |
|
|
|
|
|
camera = np.eye(4) |
|
if gravity_align: |
|
|
|
spec = CameraRegressorNetwork() |
|
spec = spec.load_ckpt('data/pretrain/camcalib_sa_biased_l2.ckpt').to('cuda') |
|
with torch.no_grad(): |
|
preds = spec(img, transform_data=True) |
|
pred_vfov, pred_pitch, pred_roll = preds |
|
pred_f_pix = img.shape[0] / 2. / np.tan(pred_vfov / 2.) |
|
gravity_cam = spec.to_gravity_cam(pred_pitch, pred_roll) |
|
|
|
verts, [gv, gf, gc], R, T = align_meshes_to_gravity( |
|
verts, |
|
gravity_cam, |
|
floor_scale=2, |
|
floor_color=[[0.73, 0.78, 0.82], |
|
[0.61, 0.69, 0.72]] |
|
) |
|
cam_r = R.mT |
|
cam_t = - cam_r @ T |
|
camera = np.eye(4) |
|
camera[:3,:3] = cam_r |
|
camera[:3, 3] = cam_t |
|
floor = [gv.numpy(), gf.numpy()] |
|
|
|
else: |
|
rot_180 = np.eye(3) |
|
rot_180[1, 1] = -1 |
|
rot_180[2, 2] = -1 |
|
verts = verts @ rot_180 |
|
|
|
camera = np.eye(4) |
|
camera[:3, :3] = rot_180 @ camera[:3, :3] |
|
camera[:3, 3] = camera[:3, 3] @ rot_180 |
|
floor = None |
|
|
|
|
|
viser_vis_human(verts, smplx.faces, cameras=[camera], floor=floor, image=img) |
|
|
|
|
|
if __name__ == '__main__': |
|
tyro.cli(main) |