Spaces:

LTT
/

Kiss3DGen

Running on Zero

App Files Files Community

LTT commited on Dec 17, 2024

Commit

72e5710

1 Parent(s): 84fce77

new

Browse files

Files changed (1) hide show

demo.py +325 -0

demo.py ADDED Viewed

	@@ -0,0 +1,325 @@

+import gradio as gr
+import os
+import subprocess
+import shlex
+import spaces
+import torch
+import numpy as numpy
+access_token = os.getenv("HUGGINGFACE_TOKEN")
+subprocess.run(
+    shlex.split(
+        "pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt210/download.html"
+    )
+)
+subprocess.run(
+    shlex.split(
+        "pip install ./extension/nvdiffrast-0.3.1+torch-py3-none-any.whl --force-reinstall --no-deps"
+    )
+)
+subprocess.run(
+    shlex.split(
+        "pip install ./extension/renderutils_plugin-1.0-cp310-cp310-linux_x86_64.whl --force-reinstall --no-deps"
+    )
+)
+def install_cuda_toolkit():
+    # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
+    # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run"
+    CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run"
+    CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
+    subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
+    subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
+    subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])
+    os.environ["CUDA_HOME"] = "/usr/local/cuda"
+    os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
+    os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
+        os.environ["CUDA_HOME"],
+        "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
+    )
+    # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
+    os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
+    print("==> finfish install")
+# install_cuda_toolkit()
+@spaces.GPU
+def check_gpu():
+    os.environ['CUDA_HOME'] = '/usr/local/cuda-12.1'
+    os.environ['PATH'] += ':/usr/local/cuda-12.1/bin'
+    # os.environ['LD_LIBRARY_PATH'] += ':/usr/local/cuda-12.1/lib64'
+    os.environ['LD_LIBRARY_PATH'] = "/usr/local/cuda-12.1/lib64:" + os.environ.get('LD_LIBRARY_PATH', '')
+    subprocess.run(['nvidia-smi'])  # 测试 CUDA 是否可用
+    print(f"torch.cuda.is_available:{torch.cuda.is_available()}")
+check_gpu()
+from PIL import Image
+from einops import rearrange
+from diffusers import FluxPipeline
+from models.lrm.utils.camera_util import get_flux_input_cameras
+from models.lrm.utils.infer_util import save_video
+from models.lrm.utils.mesh_util import save_obj, save_obj_with_mtl
+from models.lrm.utils.render_utils import rotate_x, rotate_y
+from models.lrm.utils.train_util import instantiate_from_config
+from models.ISOMER.reconstruction_func import reconstruction
+from models.ISOMER.projection_func import projection
+import os
+from einops import rearrange
+from omegaconf import OmegaConf
+import torch
+import numpy as np
+import trimesh
+import torchvision
+import torch.nn.functional as F
+from PIL import Image
+from torchvision import transforms
+from torchvision.transforms import v2
+from diffusers import  DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL
+from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast
+from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images
+from diffusers import FluxPipeline
+from pytorch_lightning import seed_everything
+import os
+from huggingface_hub import hf_hub_download
+from utils.tool import NormalTransfer, get_background, get_render_cameras_video, load_mipmap, render_frames
+device_0 = "cuda:0"
+device_1 = "cuda:1"
+resolution = 512
+save_dir = "./outputs"
+normal_transfer = NormalTransfer()
+isomer_azimuths = torch.from_numpy(np.array([0, 90, 180, 270])).float().to(device_1)
+isomer_elevations = torch.from_numpy(np.array([5, 5, 5, 5])).float().to(device_1)
+isomer_radius = 4.5
+isomer_geo_weights = torch.from_numpy(np.array([1, 0.9, 1, 0.9])).float().to(device_1)
+isomer_color_weights = torch.from_numpy(np.array([1, 0.5, 1, 0.5])).float().to(device_1)
+# model initialization and loading
+# flux
+taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=torch.bfloat16).to(device_0)
+good_vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=torch.bfloat16, token=access_token).to(device_0)
+# flux_pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, token=access_token).to(device=device_0, dtype=torch.bfloat16)
+flux_pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, vae=taef1, token=access_token).to(device_0)
+flux_lora_ckpt_path = hf_hub_download(repo_id="LTT/xxx-ckpt", filename="rgb_normal_large.safetensors", repo_type="model")
+flux_pipe.load_lora_weights(flux_lora_ckpt_path)
+# flux_pipe.to(device=device_0, dtype=torch.bfloat16)
+torch.cuda.empty_cache()
+flux_pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(flux_pipe)
+# lrm
+config = OmegaConf.load("./models/lrm/config/PRM_inference.yaml")
+model_config = config.model_config
+infer_config = config.infer_config
+model = instantiate_from_config(model_config)
+model_ckpt_path = hf_hub_download(repo_id="LTT/PRM", filename="final_ckpt.ckpt", repo_type="model")
+state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
+state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.')}
+model.load_state_dict(state_dict, strict=True)
+model = model.to(device_1)
+torch.cuda.empty_cache()
+@spaces.GPU
+def lrm_reconstructions(image, input_cameras, save_path=None, name="temp", export_texmap=False, if_save_video=False):
+    images = image.unsqueeze(0).to(device_1)
+    images = v2.functional.resize(images, 512, interpolation=3, antialias=True).clamp(0, 1)
+    # breakpoint()
+    with torch.no_grad():
+        # get triplane
+        planes = model.forward_planes(images, input_cameras)
+        mesh_path_idx = os.path.join(save_path, f'{name}.obj')
+        mesh_out = model.extract_mesh(
+            planes,
+            use_texture_map=export_texmap,
+            **infer_config,
+        )
+        if export_texmap:
+            vertices, faces, uvs, mesh_tex_idx, tex_map = mesh_out
+            save_obj_with_mtl(
+                vertices.data.cpu().numpy(),
+                uvs.data.cpu().numpy(),
+                faces.data.cpu().numpy(),
+                mesh_tex_idx.data.cpu().numpy(),
+                tex_map.permute(1, 2, 0).data.cpu().numpy(),
+                mesh_path_idx,
+            )
+        else:
+            vertices, faces, vertex_colors = mesh_out
+            save_obj(vertices, faces, vertex_colors, mesh_path_idx)
+        print(f"Mesh saved to {mesh_path_idx}")
+        render_size = 512
+        if if_save_video:
+            video_path_idx = os.path.join(save_path, f'{name}.mp4')
+            render_size = infer_config.render_resolution
+            ENV = load_mipmap("models/lrm/env_mipmap/6")
+            materials = (0.0,0.9)
+            all_mv, all_mvp, all_campos = get_render_cameras_video(
+                batch_size=1,
+                M=240,
+                radius=4.5,
+                elevation=(90, 60.0),
+                is_flexicubes=True,
+                fov=30
+            )
+            frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals, alphas = render_frames(
+                model,
+                planes,
+                render_cameras=all_mvp,
+                camera_pos=all_campos,
+                env=ENV,
+                materials=materials,
+                render_size=render_size,
+                chunk_size=20,
+                is_flexicubes=True,
+            )
+            normals = (torch.nn.functional.normalize(normals) + 1) / 2
+            normals = normals * alphas + (1-alphas)
+            all_frames = torch.cat([frames, albedos, pbr_spec_lights, pbr_diffuse_lights, normals], dim=3)
+            save_video(
+                all_frames,
+                video_path_idx,
+                fps=30,
+            )
+            print(f"Video saved to {video_path_idx}")
+    return vertices, faces
+def local_normal_global_transform(local_normal_images, azimuths_deg, elevations_deg):
+    if local_normal_images.min() >= 0:
+        local_normal = local_normal_images.float() * 2 - 1
+    else:
+        local_normal = local_normal_images.float()
+    global_normal = normal_transfer.trans_local_2_global(local_normal, azimuths_deg, elevations_deg, radius=4.5, for_lotus=False)
+    global_normal[...,0] *= -1
+    global_normal = (global_normal + 1) / 2
+    global_normal = global_normal.permute(0, 3, 1, 2)
+    return global_normal
+# 生成多视图图像
+@spaces.GPU(duration=120)
+def generate_multi_view_images(prompt, seed):
+    # torch.cuda.empty_cache()
+    # generator = torch.manual_seed(seed)
+    generator = torch.Generator().manual_seed(seed)
+    with torch.no_grad():
+        # images = flux_pipe(
+        #     prompt=prompt,
+        #     num_inference_steps=10,
+        #     guidance_scale=3.5,
+        #     num_images_per_prompt=1,
+        #     width=resolution * 4,
+        #     height=resolution * 2,
+        #     output_type='np',
+        #     generator=generator,
+        #     good_vae=good_vae,
+        # ).images
+        for img in flux_pipe.flux_pipe_call_that_returns_an_iterable_of_images(
+            prompt=prompt,
+            guidance_scale=3.5,
+            num_inference_steps=10,
+            width=resolution * 4,
+            height=resolution * 2,
+            generator=generator,
+            output_type="np",
+            good_vae=good_vae,
+        ):
+            pass
+    # 返回最终的图像和种子（通过外部调用处理）
+    return img
+# 重建 3D 模型
+@spaces.GPU
+def reconstruct_3d_model(images, prompt):
+    global model
+    model.init_flexicubes_geometry(device_1, fovy=50.0)
+    model = model.eval()
+    rgb_normal_grid = images
+    save_dir_path = os.path.join(save_dir, prompt.replace(" ", "_"))
+    os.makedirs(save_dir_path, exist_ok=True)
+    images = torch.from_numpy(rgb_normal_grid).squeeze(0).permute(2, 0, 1).contiguous().float()     # (3, 1024, 2048)
+    images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=2, m=4)        # (8, 3, 512, 512)
+    rgb_multi_view = images[:4, :3, :, :]
+    normal_multi_view = images[4:, :3, :, :]
+    multi_view_mask = get_background(normal_multi_view)
+    rgb_multi_view = rgb_multi_view * rgb_multi_view + (1-multi_view_mask)
+    input_cameras = get_flux_input_cameras(batch_size=1, radius=4.2, fov=30).to(device_1)
+    vertices, faces = lrm_reconstructions(rgb_multi_view, input_cameras, save_path=save_dir_path, name='lrm', export_texmap=False, if_save_video=False)
+    # local normal to global normal
+    global_normal = local_normal_global_transform(normal_multi_view.permute(0, 2, 3, 1), isomer_azimuths, isomer_elevations)
+    global_normal = global_normal * multi_view_mask + (1-multi_view_mask)
+    global_normal = global_normal.permute(0,2,3,1)
+    rgb_multi_view = rgb_multi_view.permute(0,2,3,1)
+    multi_view_mask = multi_view_mask.permute(0,2,3,1).squeeze(-1)
+    vertices = torch.from_numpy(vertices).to(device_1)
+    faces = torch.from_numpy(faces).to(device_1)
+    vertices = vertices @ rotate_x(np.pi / 2, device=vertices.device)[:3, :3]
+    vertices = vertices @ rotate_y(np.pi / 2, device=vertices.device)[:3, :3]
+    # global_normal: B,H,W,3
+    # multi_view_mask: B,H,W
+    # rgb_multi_view: B,H,W,3
+    meshes = reconstruction(
+        normal_pils=global_normal,
+        masks=multi_view_mask,
+        weights=isomer_geo_weights,
+        fov=30,
+        radius=isomer_radius,
+        camera_angles_azi=isomer_azimuths,
+        camera_angles_ele=isomer_elevations,
+        expansion_weight_stage1=0.1,
+        init_type="file",
+        init_verts=vertices,
+        init_faces=faces,
+        stage1_steps=0,
+        stage2_steps=50,
+        start_edge_len_stage1=0.1,
+        end_edge_len_stage1=0.02,
+        start_edge_len_stage2=0.02,
+        end_edge_len_stage2=0.005,
+    )
+    save_glb_addr = projection(
+        meshes,
+        masks=multi_view_mask,
+        images=rgb_multi_view,
+        azimuths=isomer_azimuths,
+        elevations=isomer_elevations,
+        weights=isomer_color_weights,
+        fov=30,
+        radius=isomer_radius,
+        save_dir=f"{save_dir_path}/ISOMER/",
+    )
+    return save_glb_addr
+# Gradio 接口函数
+@spaces.GPU
+def gradio_pipeline(prompt, seed):
+    # 生成多视图图像
+    rgb_normal_grid = generate_multi_view_images(prompt, seed)
+    image_preview = Image.fromarray((rgb_normal_grid * 255).astype(np.uint8))
+    # 3d reconstruction
+    # 重建 3D 模型并返回 glb 路径
+    save_glb_addr = reconstruct_3d_model(rgb_normal_grid, prompt)
+    return image_preview, save_glb_addr
+if __name__ == "__main__":
+    prompt_input = "a owm"
+    sample_seed = 42
+    gradio_pipeline(prompt_input, sample_seed)