|
import gradio as gr |
|
import os |
|
import subprocess |
|
import shlex |
|
import spaces |
|
import torch |
|
access_token = os.getenv("HUGGINGFACE_TOKEN") |
|
subprocess.run( |
|
shlex.split( |
|
"pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt240/download.html" |
|
) |
|
) |
|
|
|
subprocess.run( |
|
shlex.split( |
|
"pip install ./extension/nvdiffrast-0.3.1+torch-py3-none-any.whl --force-reinstall --no-deps" |
|
) |
|
) |
|
|
|
subprocess.run( |
|
shlex.split( |
|
"pip install ./extension/renderutils_plugin-1.0.0-py3-none-any.whl --force-reinstall --no-deps" |
|
) |
|
) |
|
def install_cuda_toolkit(): |
|
|
|
|
|
CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run" |
|
CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL) |
|
subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE]) |
|
subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE]) |
|
subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"]) |
|
|
|
os.environ["CUDA_HOME"] = "/usr/local/cuda" |
|
os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"]) |
|
os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % ( |
|
os.environ["CUDA_HOME"], |
|
"" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"], |
|
) |
|
|
|
os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6" |
|
print("==> finfish install") |
|
install_cuda_toolkit() |
|
@spaces.GPU |
|
def check_gpu(): |
|
os.environ['CUDA_HOME'] = '/usr/local/cuda-12.1' |
|
os.environ['PATH'] += ':/usr/local/cuda-12.1/bin' |
|
|
|
os.environ['LD_LIBRARY_PATH'] = "/usr/local/cuda-12.1/lib64:" + os.environ.get('LD_LIBRARY_PATH', '') |
|
subprocess.run(['nvidia-smi']) |
|
print(f"torch.cuda.is_available:{torch.cuda.is_available()}") |
|
check_gpu() |
|
|
|
from PIL import Image |
|
from einops import rearrange |
|
from diffusers import FluxPipeline |
|
from models.lrm.utils.camera_util import get_flux_input_cameras |
|
from models.lrm.utils.infer_util import save_video |
|
from models.lrm.utils.mesh_util import save_obj, save_obj_with_mtl |
|
from models.lrm.utils.render_utils import rotate_x, rotate_y |
|
from models.lrm.utils.train_util import instantiate_from_config |
|
from models.ISOMER.reconstruction_func import reconstruction |
|
from models.ISOMER.projection_func import projection |
|
import os |
|
from einops import rearrange |
|
from omegaconf import OmegaConf |
|
import torch |
|
import numpy as np |
|
import trimesh |
|
import torchvision |
|
import torch.nn.functional as F |
|
from PIL import Image |
|
from torchvision import transforms |
|
from torchvision.transforms import v2 |
|
from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL |
|
from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast |
|
from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images |
|
from diffusers import FluxPipeline |
|
from pytorch_lightning import seed_everything |
|
import os |
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
from utils.tool import NormalTransfer, get_background, get_render_cameras_video, load_mipmap, render_frames |
|
|
|
device_0 = "cuda" |
|
device_1 = "cuda" |
|
resolution = 512 |
|
save_dir = "./outputs" |
|
normal_transfer = NormalTransfer() |
|
isomer_azimuths = torch.from_numpy(np.array([0, 90, 180, 270])).float().to(device_1) |
|
isomer_elevations = torch.from_numpy(np.array([5, 5, 5, 5])).float().to(device_1) |
|
isomer_radius = 4.5 |
|
isomer_geo_weights = torch.from_numpy(np.array([1, 0.9, 1, 0.9])).float().to(device_1) |
|
isomer_color_weights = torch.from_numpy(np.array([1, 0.5, 1, 0.5])).float().to(device_1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config = OmegaConf.load("./models/lrm/config/PRM_inference.yaml") |
|
model_config = config.model_config |
|
infer_config = config.infer_config |
|
model = instantiate_from_config(model_config) |
|
model_ckpt_path = hf_hub_download(repo_id="LTT/PRM", filename="final_ckpt.ckpt", repo_type="model") |
|
state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict'] |
|
state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.')} |
|
model.load_state_dict(state_dict, strict=True) |
|
model = model.to(device_1) |
|
torch.cuda.empty_cache() |
|
@spaces.GPU |
|
def lrm_reconstructions(image, input_cameras, save_path=None, name="temp", export_texmap=False, if_save_video=False): |
|
images = image.unsqueeze(0).to(device_1) |
|
images = v2.functional.resize(images, 512, interpolation=3, antialias=True).clamp(0, 1) |
|
|
|
with torch.no_grad(): |
|
|
|
planes = model.forward_planes(images, input_cameras) |
|
|
|
mesh_path_idx = os.path.join(save_path, f'{name}.obj') |
|
|
|
mesh_out = model.extract_mesh( |
|
planes, |
|
use_texture_map=export_texmap, |
|
**infer_config, |
|
) |
|
if export_texmap: |
|
vertices, faces, uvs, mesh_tex_idx, tex_map = mesh_out |
|
save_obj_with_mtl( |
|
vertices.data.cpu().numpy(), |
|
uvs.data.cpu().numpy(), |
|
faces.data.cpu().numpy(), |
|
mesh_tex_idx.data.cpu().numpy(), |
|
tex_map.permute(1, 2, 0).data.cpu().numpy(), |
|
mesh_path_idx, |
|
) |
|
else: |
|
vertices, faces, vertex_colors = mesh_out |
|
save_obj(vertices, faces, vertex_colors, mesh_path_idx) |
|
print(f"Mesh saved to {mesh_path_idx}") |
|
|
|
render_size = 512 |
|
if if_save_video: |
|
video_path_idx = os.path.join(save_path, f'{name}.mp4') |
|
render_size = infer_config.render_resolution |
|
ENV = load_mipmap("models/lrm/env_mipmap/6") |
|
materials = (0.0,0.9) |
|
|
|
all_mv, all_mvp, all_campos = get_render_cameras_video( |
|
batch_size=1, |
|
M=240, |
|
radius=4.5, |
|
elevation=(90, 60.0), |
|
is_flexicubes=True, |
|
fov=30 |
|
) |
|
|
|
frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals, alphas = render_frames( |
|
model, |
|
planes, |
|
render_cameras=all_mvp, |
|
camera_pos=all_campos, |
|
env=ENV, |
|
materials=materials, |
|
render_size=render_size, |
|
chunk_size=20, |
|
is_flexicubes=True, |
|
) |
|
normals = (torch.nn.functional.normalize(normals) + 1) / 2 |
|
normals = normals * alphas + (1-alphas) |
|
all_frames = torch.cat([frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals], dim=3) |
|
|
|
save_video( |
|
all_frames, |
|
video_path_idx, |
|
fps=30, |
|
) |
|
print(f"Video saved to {video_path_idx}") |
|
|
|
return vertices, faces |
|
|
|
|
|
def local_normal_global_transform(local_normal_images, azimuths_deg, elevations_deg): |
|
if local_normal_images.min() >= 0: |
|
local_normal = local_normal_images.float() * 2 - 1 |
|
else: |
|
local_normal = local_normal_images.float() |
|
global_normal = normal_transfer.trans_local_2_global(local_normal, azimuths_deg, elevations_deg, radius=4.5, for_lotus=False) |
|
global_normal[...,0] *= -1 |
|
global_normal = (global_normal + 1) / 2 |
|
global_normal = global_normal.permute(0, 3, 1, 2) |
|
return global_normal |
|
|
|
|
|
@spaces.GPU(duration=120) |
|
def generate_multi_view_images(prompt, seed): |
|
|
|
|
|
generator = torch.Generator().manual_seed(seed) |
|
with torch.no_grad(): |
|
img = flux_pipe( |
|
prompt=prompt, |
|
num_inference_steps=5, |
|
guidance_scale=3.5, |
|
num_images_per_prompt=1, |
|
width=resolution * 2, |
|
height=resolution * 1, |
|
output_type='np', |
|
generator=generator, |
|
).images |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return img |
|
|
|
|
|
@spaces.GPU |
|
def reconstruct_3d_model(images, prompt): |
|
global model |
|
model.init_flexicubes_geometry(device_1, fovy=50.0) |
|
model = model.eval() |
|
rgb_normal_grid = images |
|
save_dir_path = os.path.join(save_dir, prompt.replace(" ", "_")) |
|
os.makedirs(save_dir_path, exist_ok=True) |
|
|
|
images = torch.from_numpy(rgb_normal_grid).squeeze(0).permute(2, 0, 1).contiguous().float() |
|
images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=2, m=4) |
|
rgb_multi_view = images[:4, :3, :, :] |
|
normal_multi_view = images[4:, :3, :, :] |
|
multi_view_mask = get_background(normal_multi_view) |
|
rgb_multi_view = rgb_multi_view * rgb_multi_view + (1-multi_view_mask) |
|
input_cameras = get_flux_input_cameras(batch_size=1, radius=4.2, fov=30).to(device_1) |
|
vertices, faces = lrm_reconstructions(rgb_multi_view, input_cameras, save_path=save_dir_path, name='lrm', export_texmap=False, if_save_video=True) |
|
|
|
|
|
global_normal = local_normal_global_transform(normal_multi_view.permute(0, 2, 3, 1), isomer_azimuths, isomer_elevations) |
|
global_normal = global_normal * multi_view_mask + (1-multi_view_mask) |
|
|
|
global_normal = global_normal.permute(0,2,3,1) |
|
rgb_multi_view = rgb_multi_view.permute(0,2,3,1) |
|
multi_view_mask = multi_view_mask.permute(0,2,3,1).squeeze(-1) |
|
vertices = torch.from_numpy(vertices).to(device_1) |
|
faces = torch.from_numpy(faces).to(device_1) |
|
vertices = vertices @ rotate_x(np.pi / 2, device=vertices.device)[:3, :3] |
|
vertices = vertices @ rotate_y(np.pi / 2, device=vertices.device)[:3, :3] |
|
|
|
|
|
|
|
|
|
|
|
meshes = reconstruction( |
|
normal_pils=global_normal, |
|
masks=multi_view_mask, |
|
weights=isomer_geo_weights, |
|
fov=30, |
|
radius=isomer_radius, |
|
camera_angles_azi=isomer_azimuths, |
|
camera_angles_ele=isomer_elevations, |
|
expansion_weight_stage1=0.1, |
|
init_type="file", |
|
init_verts=vertices, |
|
init_faces=faces, |
|
stage1_steps=0, |
|
stage2_steps=50, |
|
start_edge_len_stage1=0.1, |
|
end_edge_len_stage1=0.02, |
|
start_edge_len_stage2=0.02, |
|
end_edge_len_stage2=0.005, |
|
) |
|
|
|
|
|
save_glb_addr = projection( |
|
meshes, |
|
masks=multi_view_mask, |
|
images=rgb_multi_view, |
|
azimuths=isomer_azimuths, |
|
elevations=isomer_elevations, |
|
weights=isomer_color_weights, |
|
fov=30, |
|
radius=isomer_radius, |
|
save_dir=f"{save_dir_path}/ISOMER/", |
|
) |
|
|
|
return save_glb_addr |
|
|
|
|
|
@spaces.GPU |
|
def gradio_pipeline(prompt, seed): |
|
import ctypes |
|
|
|
cuda_lib_path = "/usr/local/cuda-12.1/lib64/libnvrtc.so.12" |
|
try: |
|
ctypes.CDLL(cuda_lib_path, mode=ctypes.RTLD_GLOBAL) |
|
print(f"Successfully preloaded {cuda_lib_path}") |
|
except OSError as e: |
|
print(f"Failed to preload {cuda_lib_path}: {e}") |
|
|
|
|
|
rgb_normal_grid = np.load("rgb_normal_grid.npy") |
|
image_preview = Image.fromarray((rgb_normal_grid[0] * 255).astype(np.uint8)) |
|
|
|
|
|
|
|
|
|
|
|
save_glb_addr = reconstruct_3d_model(rgb_normal_grid, prompt) |
|
|
|
return image_preview, save_glb_addr |
|
|
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(variant="panel"): |
|
|
|
with gr.Column(): |
|
with gr.Row(): |
|
prompt_input = gr.Textbox( |
|
label="Enter Prompt", |
|
placeholder="Describe your 3D model...", |
|
lines=2, |
|
elem_id="prompt_input" |
|
) |
|
|
|
with gr.Row(): |
|
sample_seed = gr.Number(value=42, label="Seed Value", precision=0) |
|
|
|
with gr.Row(): |
|
submit = gr.Button("Generate", elem_id="generate", variant="primary") |
|
|
|
with gr.Row(variant="panel"): |
|
gr.Markdown("Examples:") |
|
gr.Examples( |
|
examples=[ |
|
["a castle on a hill"], |
|
["an owl wearing a hat"], |
|
["a futuristic car"] |
|
], |
|
inputs=[prompt_input], |
|
label="Prompt Examples" |
|
) |
|
|
|
|
|
with gr.Column(): |
|
with gr.Row(): |
|
rgb_normal_grid_image = gr.Image( |
|
label="RGB Normal Grid", |
|
type="pil", |
|
interactive=False |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Tab("GLB"): |
|
output_glb_model = gr.Model3D( |
|
label="Generated 3D Model (GLB Format)", |
|
interactive=False |
|
) |
|
gr.Markdown("Download the model for proper visualization.") |
|
|
|
|
|
submit.click( |
|
fn=gradio_pipeline, inputs=[prompt_input, sample_seed], |
|
outputs=[rgb_normal_grid_image, output_glb_model] |
|
) |
|
|
|
|
|
|
|
demo.launch() |
|
|