import sys import os os.system("git clone https://github.com/hongfz16/EVA3D.git") sys.path.append("EVA3D") os.system("cp -r EVA3D/assets .") os.system(f"{sys.executable} -m pip install -U fvcore plotly") import torch pyt_version_str=torch.__version__.split("+")[0].replace(".", "") version_str="".join([ f"py3{sys.version_info.minor}_cu", torch.version.cuda.replace(".",""), f"_pyt{pyt_version_str}" ]) os.system(f"{sys.executable} -m pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html") import os import html import glob import uuid import hashlib import requests from tqdm import tqdm from pdb import set_trace as st from download_models import download_file eva3d_deepfashion_model = dict(file_url='https://drive.google.com/uc?id=1SYPjxnHz3XPRhTarx_Lw8SG_iz16QUMU', alt_url='', file_size=160393221, file_md5='d0fae86edf76c52e94223bd3f39b2157', file_path='checkpoint/512x256_deepfashion/volume_renderer/models_0420000.pt',) smpl_model = dict(file_url='https://drive.google.com/uc?id={}'.format(os.environ['smpl_link']), alt_url='', file_size=39001280, file_md5='65dc7f162f3ef21a38637663c57e14a7', file_path='smpl_models/smpl/SMPL_NEUTRAL.pkl',) from huggingface_hub import hf_hub_download def download_pretrained_models(): print('Downloading EVA3D model pretrained on DeepFashion.') # with requests.Session() as session: # try: # download_file(session, eva3d_deepfashion_model) # except: # print('Google Drive download failed.\n' \ # 'Trying do download from alternate server') # download_file(session, eva3d_deepfashion_model, use_alt_url=True) eva3d_ckpt = hf_hub_download(repo_id="hongfz16/EVA3D", filename="models_0420000.pt", token=os.environ['hf_token']) os.system("mkdir -p checkpoint/512x256_deepfashion/volume_renderer") os.system("mkdir -p smpl_models/smpl") os.system(f"cp {eva3d_ckpt} checkpoint/512x256_deepfashion/volume_renderer/models_0420000.pt") print('Downloading SMPL model.') # with requests.Session() as session: # try: # download_file(session, smpl_model) # except: # print('Google Drive download failed.\n' \ # 'Trying do download from alternate server') # download_file(session, smpl_model, use_alt_url=True) smpl_pkl = hf_hub_download(repo_id="hongfz16/EVA3D", filename="SMPL_NEUTRAL.pkl", token=os.environ['hf_token']) os.system(f"cp {smpl_pkl} smpl_models/smpl/SMPL_NEUTRAL.pkl") download_pretrained_models() import os import torch import trimesh import imageio import pickle import numpy as np from munch import * from PIL import Image from tqdm import tqdm from torch.nn import functional as F from torch.utils import data from torchvision import utils from torchvision import transforms from skimage.measure import marching_cubes from scipy.spatial import Delaunay from scipy.spatial.transform import Rotation as R from options import BaseOptions from model import VoxelHumanGenerator as Generator from dataset import DeepFashionDataset, DemoDataset from utils import ( generate_camera_params, align_volume, extract_mesh_with_marching_cubes, xyz2mesh, requires_grad, create_mesh_renderer, create_cameras ) from pytorch3d.io import load_objs_as_meshes, load_obj from pytorch3d.structures import Meshes from pytorch3d.renderer import ( FoVPerspectiveCameras, look_at_view_transform, look_at_rotation, RasterizationSettings, MeshRenderer, MeshRasterizer, BlendParams, SoftSilhouetteShader, HardPhongShader, PointLights, TexturesVertex, ) torch.random.manual_seed(8888) import random random.seed(8888) panning_angle = np.pi / 3 def sample_latent(opt, device): return def generate_rgb(opt, g_ema, device, mean_latent, sample_z, sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals): requires_grad(g_ema, False) g_ema.is_train = False g_ema.train_renderer = False img_list = [] for k in range(3): if k == 0: delta = R.from_rotvec(np.pi/8 * np.array([0, 1, 0])) elif k == 2: delta = R.from_rotvec(-np.pi/8 * np.array([0, 1, 0])) else: delta = R.from_rotvec(0 * np.array([0, 1, 0])) r = R.from_rotvec(sample_theta[0, :3].cpu().numpy()) new_r = delta * r new_sample_theta = sample_theta.clone() new_sample_theta[0, :3] = torch.from_numpy(new_r.as_rotvec()).to(device) with torch.no_grad(): j = 0 chunk = 1 out = g_ema([sample_z[j:j+chunk]], sample_cam_extrinsics[j:j+chunk], sample_focals[j:j+chunk], sample_beta[j:j+chunk], new_sample_theta[j:j+chunk], sample_trans[j:j+chunk], truncation=opt.truncation_ratio, truncation_latent=mean_latent, return_eikonal=False, return_normal=False, return_mask=False, fix_viewdir=True) rgb_images_thumbs = out[1].detach().cpu()[..., :3].permute(0, 3, 1, 2) g_ema.zero_grad() img_list.append(rgb_images_thumbs) utils.save_image(torch.cat(img_list, 0), os.path.join(opt.results_dst_dir, 'images_paper_fig','{}.png'.format(str(0).zfill(7))), nrow=3, normalize=True, range=(-1, 1), padding=0,) def generate_mesh(opt, g_ema, device, mean_latent, sample_z, sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals): latent = g_ema.styles_and_noise_forward(sample_z[:1], None, opt.truncation_ratio, mean_latent, False) sdf = g_ema.renderer.marching_cube_posed(latent[0], sample_beta, sample_theta, resolution=350, size=1.4).detach() marching_cubes_mesh, _, _ = extract_mesh_with_marching_cubes(sdf, level_set=0) marching_cubes_mesh = trimesh.smoothing.filter_humphrey(marching_cubes_mesh, beta=0.2, iterations=5) # marching_cubes_mesh_filename = os.path.join(opt.results_dst_dir,'marching_cubes_meshes_posed','sample_{}_marching_cubes_mesh.obj'.format(0)) # with open(marching_cubes_mesh_filename, 'w') as f: # marching_cubes_mesh.export(f,file_type='obj') return marching_cubes_mesh def generate_video(opt, g_ema, device, mean_latent, sample_z, sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals): video_list = [] for k in tqdm(range(120)): if k < 30: angle = (panning_angle / 2) * (k / 30) elif k >= 30 and k < 90: angle = panning_angle / 2 - panning_angle * ((k - 30) / 60) else: angle = -panning_angle / 2 * ((120 - k) / 30) delta = R.from_rotvec(angle * np.array([0, 1, 0])) r = R.from_rotvec(sample_theta[0, :3].cpu().numpy()) new_r = delta * r new_sample_theta = sample_theta.clone() new_sample_theta[0, :3] = torch.from_numpy(new_r.as_rotvec()).to(device) with torch.no_grad(): j = 0 chunk = 1 out = g_ema([sample_z[j:j+chunk]], sample_cam_extrinsics[j:j+chunk], sample_focals[j:j+chunk], sample_beta[j:j+chunk], new_sample_theta[j:j+chunk], sample_trans[j:j+chunk], truncation=opt.truncation_ratio, truncation_latent=mean_latent, return_eikonal=False, return_normal=False, return_mask=False, fix_viewdir=True) rgb_images_thumbs = out[1].detach().cpu()[..., :3] g_ema.zero_grad() video_list.append((rgb_images_thumbs.numpy() + 1) / 2. * 255. + 0.5) all_img = np.concatenate(video_list, 0).astype(np.uint8) imageio.mimwrite(os.path.join(opt.results_dst_dir, 'images_paper_video', 'video_{}.mp4'.format(str(0).zfill(7))), all_img, fps=30, quality=8) def setup(): device='cuda' if torch.cuda.is_available() else 'cpu' opt = BaseOptions().parse() opt.training.batch = 1 opt.training.chunk = 1 opt.experiment.expname = '512x256_deepfashion' opt.dataset.dataset_path = 'demodataset' opt.rendering.depth = 5 opt.rendering.width = 128 opt.model.style_dim = 128 opt.model.renderer_spatial_output_dim = [512, 256] opt.training.no_sphere_init = True opt.rendering.input_ch_views = 3 opt.rendering.white_bg = True opt.model.voxhuman_name = 'eva3d_deepfashion' opt.training.deltasdf = True opt.rendering.N_samples = 28 opt.experiment.ckpt = '420000' opt.inference.identities = 1 opt.inference.truncation_ratio = 0.6 opt.model.is_test = True opt.model.freeze_renderer = False opt.rendering.no_features_output = True opt.rendering.offset_sampling = True opt.rendering.static_viewdirs = True opt.rendering.force_background = True opt.rendering.perturb = 0 opt.inference.size = opt.model.size opt.inference.camera = opt.camera opt.inference.renderer_output_size = opt.model.renderer_spatial_output_dim opt.inference.style_dim = opt.model.style_dim opt.inference.project_noise = opt.model.project_noise opt.inference.return_xyz = opt.rendering.return_xyz checkpoints_dir = os.path.join('checkpoint', opt.experiment.expname, 'volume_renderer') checkpoint_path = os.path.join(checkpoints_dir, 'models_{}.pt'.format(opt.experiment.ckpt.zfill(7))) # define results directory name result_model_dir = 'iter_{}'.format(opt.experiment.ckpt.zfill(7)) # create results directory results_dir_basename = os.path.join(opt.inference.results_dir, opt.experiment.expname) opt.inference.results_dst_dir = os.path.join(results_dir_basename, result_model_dir) if opt.inference.fixed_camera_angles: opt.inference.results_dst_dir = os.path.join(opt.inference.results_dst_dir, 'fixed_angles') else: opt.inference.results_dst_dir = os.path.join(opt.inference.results_dst_dir, 'random_angles') os.makedirs(opt.inference.results_dst_dir, exist_ok=True) os.makedirs(os.path.join(opt.inference.results_dst_dir, 'images_paper_fig'), exist_ok=True) os.makedirs(os.path.join(opt.inference.results_dst_dir, 'images_paper_video'), exist_ok=True) os.makedirs(os.path.join(opt.inference.results_dst_dir, 'marching_cubes_meshes_posed'), exist_ok=True) checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) # load generation model g_ema = Generator(opt.model, opt.rendering, full_pipeline=False, voxhuman_name=opt.model.voxhuman_name).to(device) pretrained_weights_dict = checkpoint["g_ema"] model_dict = g_ema.state_dict() for k, v in pretrained_weights_dict.items(): if v.size() == model_dict[k].size(): model_dict[k] = v else: print(k) g_ema.load_state_dict(model_dict) transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)]) if 'deepfashion' in opt.dataset.dataset_path: file_list = '/mnt/lustre/fzhong/smplify-x/deepfashion_train_list/deepfashion_train_list_MAN.txt' elif '20w_fashion' in opt.dataset.dataset_path: file_list = '/mnt/lustre/fzhong/mmhuman3d/20w_fashion_result/nondress_flist.txt' else: file_list = None if file_list: dataset = DeepFashionDataset(opt.dataset.dataset_path, transform, opt.model.size, opt.model.renderer_spatial_output_dim, file_list) else: dataset = DemoDataset() # get the mean latent vector for g_ema if opt.inference.truncation_ratio < 1: with torch.no_grad(): mean_latent = g_ema.mean_latent(opt.inference.truncation_mean, device) else: mean_latent = None g_ema.renderer.is_train = False g_ema.renderer.perturb = 0 # generate(opt.inference, dataset, g_ema, device, mean_latent, opt.rendering.render_video) sample_trans, sample_beta, sample_theta = dataset.sample_smpl_param(1, device, val=False) sample_cam_extrinsics, sample_focals = dataset.get_camera_extrinsics(1, device, val=False) torch.randn(1, opt.inference.style_dim, device=device) return opt.inference, g_ema, device, mean_latent, torch.randn(1, opt.inference.style_dim, device=device), \ sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals import gradio as gr import plotly.graph_objects as go from PIL import Image setup_list = None def get_video(): global setup_list if setup_list is None: setup_list = list(setup()) generate_video(*setup_list) torch.cuda.empty_cache() path = 'evaluations/512x256_deepfashion/iter_0420000/random_angles/images_paper_video/video_0000000.mp4' return path def get_mesh(): global setup_list if setup_list is None: setup_list = list(setup()) setup_list[4] = torch.randn(1, setup_list[0].style_dim, device=setup_list[2]) generate_rgb(*setup_list) mesh = generate_mesh(*setup_list) torch.cuda.empty_cache() x=np.asarray(mesh.vertices).T[0] y=np.asarray(mesh.vertices).T[1] z=np.asarray(mesh.vertices).T[2] i=np.asarray(mesh.faces).T[0] j=np.asarray(mesh.faces).T[1] k=np.asarray(mesh.faces).T[2] fig = go.Figure(go.Mesh3d(x=x, y=y, z=z, i=i, j=j, k=k, color="lightpink", # flatshading=True, lighting=dict(ambient=0.5, diffuse=1, fresnel=4, specular=0.5, roughness=0.05, facenormalsepsilon=0, vertexnormalsepsilon=0),)) # lightposition=dict(x=100, # y=100, # z=1000))) path='evaluations/512x256_deepfashion/iter_0420000/random_angles/images_paper_fig/0000000.png' image=Image.open(path) return fig,image markdown=f''' # EVA3D: Compositional 3D Human Generation from 2D Image Collections Authored by Fangzhou Hong, Zhaoxi Chen, Yushi Lan, Liang Pan, Ziwei Liu The space demo for the ICLR 2023 Spotlight paper "EVA3D: Compositional 3D Human Generation from 2D Image Collections". ### Useful links: - [Official Github Repo](https://github.com/hongfz16/EVA3D) - [Project Page](https://hongfz16.github.io/projects/EVA3D.html) - [arXiv Link](https://arxiv.org/abs/2210.04888) Licensed under the S-Lab License. First use button "Generate RGB & Mesh" to randomly sample a 3D human. Then push button "Generate Video" to generate a panning video of the generated human. ''' with gr.Blocks() as demo: with gr.Row(): with gr.Column(): gr.Markdown(markdown) with gr.Column(): with gr.Row(): with gr.Column(): image=gr.Image(type="pil",shape=(512,256*3)) with gr.Row(): with gr.Column(): mesh = gr.Plot() with gr.Column(): video=gr.Video() # with gr.Row(): # numberoframes = gr.Slider( minimum=30, maximum=250,label='Number Of Frame For Video Generation') # model_name=gr.Dropdown(choices=["ffhq","afhq"],label="Choose Model Type") # mesh_type=gr.Dropdown(choices=["DepthMesh","Marching Cubes"],label="Choose Mesh Type") with gr.Row(): btn = gr.Button(value="Generate RGB & Mesh") btn_2=gr.Button(value="Generate Video") btn.click(get_mesh,[],[mesh,image]) btn_2.click(get_video,[],[video]) demo.launch()