MotionDiffuse / app.py
mingyuan's picture
refinement
99cd573
import os
import sys
import gradio as gr
os.makedirs("outputs", exist_ok=True)
sys.path.insert(0, '.')
from utils.get_opt import get_opt
from os.path import join as pjoin
import numpy as np
from trainers import DDPMTrainer
from models import MotionTransformer
device = 'cpu'
opt = get_opt("checkpoints/t2m/t2m_motiondiffuse/opt.txt", device)
opt.do_denoise = True
assert opt.dataset_name == "t2m"
opt.data_root = './dataset/HumanML3D'
opt.motion_dir = pjoin(opt.data_root, 'new_joint_vecs')
opt.text_dir = pjoin(opt.data_root, 'texts')
opt.joints_num = 22
opt.dim_pose = 263
mean = np.load(pjoin(opt.meta_dir, 'mean.npy'))
std = np.load(pjoin(opt.meta_dir, 'std.npy'))
def build_models(opt):
encoder = MotionTransformer(
input_feats=opt.dim_pose,
num_frames=opt.max_motion_length,
num_layers=opt.num_layers,
latent_dim=opt.latent_dim,
no_clip=opt.no_clip,
no_eff=opt.no_eff)
return encoder
encoder = build_models(opt).to(device)
trainer = DDPMTrainer(opt, encoder)
trainer.load(pjoin(opt.model_dir, opt.which_epoch + '.tar'))
trainer.eval_mode()
trainer.to(opt.device)
def generate(prompt, length):
from tools.visualization import process
result_path = "outputs/" + str(hash(prompt)) + ".mp4"
process(trainer, opt, device, mean, std, prompt, int(length), result_path)
return result_path
demo = gr.Interface(
fn=generate,
inputs=["text", gr.Slider(20, 196, value=60)],
examples=[
["the man throws a punch with each hand.", 58],
["a person spins quickly and takes off running.", 29],
["a person quickly waves with their right hand", 46],
["a person performing a slight bow", 89],
],
outputs="video",
title="MotionDiffuse: Text-Driven Human Motion Generation with Diffusion Model",
description="This is an interactive demo for MotionDiffuse. For more information, feel free to visit our project page(https://mingyuan-zhang.github.io/projects/MotionDiffuse.html).")
demo.launch(enable_queue=True)