EVA3D / app.py
HongFangzhou
add app.py
d11c8e5
raw
history blame
14.4 kB
import sys
import os
# os.system("git clone https://github.com/hongfz16/EVA3D.git")
sys.path.append("EVA3D")
os.system("cp -r EVA3D/assets .")
# os.system(f"{sys.executable} -m pip install -U fvcore plotly")
# import torch
# pyt_version_str=torch.__version__.split("+")[0].replace(".", "")
# version_str="".join([
# f"py3{sys.version_info.minor}_cu",
# torch.version.cuda.replace(".",""),
# f"_pyt{pyt_version_str}"
# ])
# os.system(f"{sys.executable} -m pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html")
# from download_models import download_pretrained_models
# download_pretrained_models()
import os
import torch
import trimesh
import imageio
import pickle
import numpy as np
from munch import *
from PIL import Image
from tqdm import tqdm
from torch.nn import functional as F
from torch.utils import data
from torchvision import utils
from torchvision import transforms
from skimage.measure import marching_cubes
from scipy.spatial import Delaunay
from scipy.spatial.transform import Rotation as R
from options import BaseOptions
from model import VoxelHumanGenerator as Generator
from dataset import DeepFashionDataset, DemoDataset
from utils import (
generate_camera_params,
align_volume,
extract_mesh_with_marching_cubes,
xyz2mesh,
requires_grad,
create_mesh_renderer,
create_cameras
)
from pytorch3d.io import load_objs_as_meshes, load_obj
from pytorch3d.structures import Meshes
from pytorch3d.renderer import (
FoVPerspectiveCameras, look_at_view_transform, look_at_rotation,
RasterizationSettings, MeshRenderer, MeshRasterizer, BlendParams,
SoftSilhouetteShader, HardPhongShader, PointLights, TexturesVertex,
)
torch.random.manual_seed(8888)
import random
random.seed(8888)
panning_angle = np.pi / 3
def sample_latent(opt, device):
return
def generate_rgb(opt, g_ema, device, mean_latent, sample_z, sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals):
requires_grad(g_ema, False)
g_ema.is_train = False
g_ema.train_renderer = False
img_list = []
for k in range(3):
if k == 0:
delta = R.from_rotvec(np.pi/8 * np.array([0, 1, 0]))
elif k == 2:
delta = R.from_rotvec(-np.pi/8 * np.array([0, 1, 0]))
else:
delta = R.from_rotvec(0 * np.array([0, 1, 0]))
r = R.from_rotvec(sample_theta[0, :3].cpu().numpy())
new_r = delta * r
new_sample_theta = sample_theta.clone()
new_sample_theta[0, :3] = torch.from_numpy(new_r.as_rotvec()).to(device)
with torch.no_grad():
j = 0
chunk = 1
out = g_ema([sample_z[j:j+chunk]],
sample_cam_extrinsics[j:j+chunk],
sample_focals[j:j+chunk],
sample_beta[j:j+chunk],
new_sample_theta[j:j+chunk],
sample_trans[j:j+chunk],
truncation=opt.truncation_ratio,
truncation_latent=mean_latent,
return_eikonal=False,
return_normal=False,
return_mask=False,
fix_viewdir=True)
rgb_images_thumbs = out[1].detach().cpu()[..., :3].permute(0, 3, 1, 2)
g_ema.zero_grad()
img_list.append(rgb_images_thumbs)
utils.save_image(torch.cat(img_list, 0),
os.path.join(opt.results_dst_dir, 'images_paper_fig','{}.png'.format(str(0).zfill(7))),
nrow=3,
normalize=True,
range=(-1, 1),
padding=0,)
def generate_mesh(opt, g_ema, device, mean_latent, sample_z, sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals):
latent = g_ema.styles_and_noise_forward(sample_z[:1], None, opt.truncation_ratio,
mean_latent, False)
sdf = g_ema.renderer.marching_cube_posed(latent[0], sample_beta, sample_theta, resolution=350, size=1.4).detach()
marching_cubes_mesh, _, _ = extract_mesh_with_marching_cubes(sdf, level_set=0)
marching_cubes_mesh = trimesh.smoothing.filter_humphrey(marching_cubes_mesh, beta=0.2, iterations=5)
# marching_cubes_mesh_filename = os.path.join(opt.results_dst_dir,'marching_cubes_meshes_posed','sample_{}_marching_cubes_mesh.obj'.format(0))
# with open(marching_cubes_mesh_filename, 'w') as f:
# marching_cubes_mesh.export(f,file_type='obj')
return marching_cubes_mesh
def generate_video(opt, g_ema, device, mean_latent, sample_z, sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals):
video_list = []
for k in tqdm(range(120)):
if k < 30:
angle = (panning_angle / 2) * (k / 30)
elif k >= 30 and k < 90:
angle = panning_angle / 2 - panning_angle * ((k - 30) / 60)
else:
angle = -panning_angle / 2 * ((120 - k) / 30)
delta = R.from_rotvec(angle * np.array([0, 1, 0]))
r = R.from_rotvec(sample_theta[0, :3].cpu().numpy())
new_r = delta * r
new_sample_theta = sample_theta.clone()
new_sample_theta[0, :3] = torch.from_numpy(new_r.as_rotvec()).to(device)
with torch.no_grad():
j = 0
chunk = 1
out = g_ema([sample_z[j:j+chunk]],
sample_cam_extrinsics[j:j+chunk],
sample_focals[j:j+chunk],
sample_beta[j:j+chunk],
new_sample_theta[j:j+chunk],
sample_trans[j:j+chunk],
truncation=opt.truncation_ratio,
truncation_latent=mean_latent,
return_eikonal=False,
return_normal=False,
return_mask=False,
fix_viewdir=True)
rgb_images_thumbs = out[1].detach().cpu()[..., :3]
g_ema.zero_grad()
video_list.append((rgb_images_thumbs.numpy() + 1) / 2. * 255. + 0.5)
all_img = np.concatenate(video_list, 0).astype(np.uint8)
imageio.mimwrite(os.path.join(opt.results_dst_dir, 'images_paper_video', 'video_{}.mp4'.format(str(0).zfill(7))), all_img, fps=30, quality=8)
def setup():
device='cuda' if torch.cuda.is_available() else 'cpu'
opt = BaseOptions().parse()
opt.training.batch = 1
opt.training.chunk = 1
opt.experiment.expname = '512x256_deepfashion'
opt.dataset.dataset_path = 'demodataset'
opt.rendering.depth = 5
opt.rendering.width = 128
opt.model.style_dim = 128
opt.model.renderer_spatial_output_dim = [512, 256]
opt.training.no_sphere_init = True
opt.rendering.input_ch_views = 3
opt.rendering.white_bg = True
opt.model.voxhuman_name = 'eva3d_deepfashion'
opt.training.deltasdf = True
opt.rendering.N_samples = 28
opt.experiment.ckpt = '420000'
opt.inference.identities = 1
opt.inference.truncation_ratio = 0.5
opt.model.is_test = True
opt.model.freeze_renderer = False
opt.rendering.no_features_output = True
opt.rendering.offset_sampling = True
opt.rendering.static_viewdirs = True
opt.rendering.force_background = True
opt.rendering.perturb = 0
opt.inference.size = opt.model.size
opt.inference.camera = opt.camera
opt.inference.renderer_output_size = opt.model.renderer_spatial_output_dim
opt.inference.style_dim = opt.model.style_dim
opt.inference.project_noise = opt.model.project_noise
opt.inference.return_xyz = opt.rendering.return_xyz
checkpoints_dir = os.path.join('checkpoint', opt.experiment.expname, 'volume_renderer')
checkpoint_path = os.path.join(checkpoints_dir,
'models_{}.pt'.format(opt.experiment.ckpt.zfill(7)))
# define results directory name
result_model_dir = 'iter_{}'.format(opt.experiment.ckpt.zfill(7))
# create results directory
results_dir_basename = os.path.join(opt.inference.results_dir, opt.experiment.expname)
opt.inference.results_dst_dir = os.path.join(results_dir_basename, result_model_dir)
if opt.inference.fixed_camera_angles:
opt.inference.results_dst_dir = os.path.join(opt.inference.results_dst_dir, 'fixed_angles')
else:
opt.inference.results_dst_dir = os.path.join(opt.inference.results_dst_dir, 'random_angles')
os.makedirs(opt.inference.results_dst_dir, exist_ok=True)
os.makedirs(os.path.join(opt.inference.results_dst_dir, 'images_paper_fig'), exist_ok=True)
os.makedirs(os.path.join(opt.inference.results_dst_dir, 'images_paper_video'), exist_ok=True)
os.makedirs(os.path.join(opt.inference.results_dst_dir, 'marching_cubes_meshes_posed'), exist_ok=True)
checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage)
# load generation model
g_ema = Generator(opt.model, opt.rendering, full_pipeline=False, voxhuman_name=opt.model.voxhuman_name).to(device)
pretrained_weights_dict = checkpoint["g_ema"]
model_dict = g_ema.state_dict()
for k, v in pretrained_weights_dict.items():
if v.size() == model_dict[k].size():
model_dict[k] = v
else:
print(k)
g_ema.load_state_dict(model_dict)
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)])
if 'deepfashion' in opt.dataset.dataset_path:
file_list = '/mnt/lustre/fzhong/smplify-x/deepfashion_train_list/deepfashion_train_list_MAN.txt'
elif '20w_fashion' in opt.dataset.dataset_path:
file_list = '/mnt/lustre/fzhong/mmhuman3d/20w_fashion_result/nondress_flist.txt'
else:
file_list = None
if file_list:
dataset = DeepFashionDataset(opt.dataset.dataset_path, transform, opt.model.size,
opt.model.renderer_spatial_output_dim, file_list)
else:
dataset = DemoDataset()
# get the mean latent vector for g_ema
if opt.inference.truncation_ratio < 1:
with torch.no_grad():
mean_latent = g_ema.mean_latent(opt.inference.truncation_mean, device)
else:
mean_latent = None
g_ema.renderer.is_train = False
g_ema.renderer.perturb = 0
# generate(opt.inference, dataset, g_ema, device, mean_latent, opt.rendering.render_video)
sample_trans, sample_beta, sample_theta = dataset.sample_smpl_param(1, device, val=False)
sample_cam_extrinsics, sample_focals = dataset.get_camera_extrinsics(1, device, val=False)
torch.randn(1, opt.inference.style_dim, device=device)
return opt.inference, g_ema, device, mean_latent, torch.randn(1, opt.inference.style_dim, device=device), \
sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals
import gradio as gr
import plotly.graph_objects as go
from PIL import Image
setup_list = None
def get_video():
global setup_list
if setup_list is None:
setup_list = list(setup())
generate_video(*setup_list)
torch.cuda.empty_cache()
path = 'evaluations/512x256_deepfashion/iter_0420000/random_angles/images_paper_video/video_0000000.mp4'
return path
def get_mesh():
global setup_list
if setup_list is None:
setup_list = list(setup())
setup_list[4] = torch.randn(1, setup_list[0].style_dim, device=setup_list[2])
generate_rgb(*setup_list)
mesh = generate_mesh(*setup_list)
torch.cuda.empty_cache()
x=np.asarray(mesh.vertices).T[0]
y=np.asarray(mesh.vertices).T[1]
z=np.asarray(mesh.vertices).T[2]
i=np.asarray(mesh.faces).T[0]
j=np.asarray(mesh.faces).T[1]
k=np.asarray(mesh.faces).T[2]
fig = go.Figure(go.Mesh3d(x=x, y=y, z=z,
i=i, j=j, k=k,
color="lightpink",))
# # flatshading=True,
# lighting=dict(ambient=0.5,
# diffuse=1,
# fresnel=4,
# specular=0.5,
# roughness=0.05,
# facenormalsepsilon=0,
# vertexnormalsepsilon=0),
# lightposition=dict(x=100,
# y=100,
# z=1000)))
path='evaluations/512x256_deepfashion/iter_0420000/random_angles/images_paper_fig/0000000.png'
image=Image.open(path)
return fig,image
markdown=f'''
# EVA3D: Compositional 3D Human Generation from 2D Image Collections
Authored by Fangzhou Hong, Zhaoxi Chen, Yushi Lan, Liang Pan, Ziwei Liu
The space demo for the ICLR 2023 Spotlight paper "EVA3D: Compositional 3D Human Generation from 2D Image Collections".
### Useful links:
- [Official Github Repo](https://github.com/hongfz16/EVA3D)
- [Project Page](https://hongfz16.github.io/projects/EVA3D.html)
- [arXiv Link](https://arxiv.org/abs/2210.04888)
Licensed under the S-Lab License.
First use button "Generate RGB & Mesh" to randomly sample a 3D human. (~6s) Then push button "Generate Video" to generate a panning video of the generated human. (~30s)
'''
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Markdown(markdown)
with gr.Column():
with gr.Row():
with gr.Column():
image=gr.Image(type="pil",shape=(512,256*3))
with gr.Row():
with gr.Column():
mesh = gr.Plot()
with gr.Column():
video=gr.Video()
# with gr.Row():
# numberoframes = gr.Slider( minimum=30, maximum=250,label='Number Of Frame For Video Generation')
# model_name=gr.Dropdown(choices=["ffhq","afhq"],label="Choose Model Type")
# mesh_type=gr.Dropdown(choices=["DepthMesh","Marching Cubes"],label="Choose Mesh Type")
with gr.Row():
btn = gr.Button(value="Generate RGB & Mesh")
btn_2=gr.Button(value="Generate Video")
btn.click(get_mesh,[],[mesh,image])
btn_2.click(get_video,[],[video])
demo.launch(debug=True)