#import spaces import os, logging, time, argparse, random, tempfile, rembg, shlex, subprocess import gradio as gr import numpy as np import torch from PIL import Image from functools import partial #subprocess.run(shlex.split('pip install wheel/torchmcubes-0.1.0-cp310-cp310-linux_x86_64.whl')) from tsr.system import TSR from tsr.utils import remove_background, resize_foreground, to_gradio_3d_orientation from src.scheduler_perflow import PeRFlowScheduler from diffusers import StableDiffusionPipeline, UNet2DConditionModel def fill_background(img): img = np.array(img).astype(np.float32) / 255.0 img = img[:, :, :3] * img[:, :, 3:4] + (1 - img[:, :, 3:4]) * 0.5 img = Image.fromarray((img * 255.0).astype(np.uint8)) return img def merge_delta_weights_into_unet(pipe, delta_weights, org_alpha = 1.0): unet_weights = pipe.unet.state_dict() for key in delta_weights.keys(): dtype = unet_weights[key].dtype try: unet_weights[key] = org_alpha * unet_weights[key].to(dtype=delta_weights[key].dtype) + delta_weights[key].to(device=unet_weights[key].device) except: unet_weights[key] = unet_weights[key].to(dtype=delta_weights[key].dtype) unet_weights[key] = unet_weights[key].to(dtype) pipe.unet.load_state_dict(unet_weights, strict=True) return pipe def setup_seed(seed): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True if torch.cuda.is_available(): device = "cuda:0" else: device = "cpu" ### TripoSR model = TSR.from_pretrained( "stabilityai/TripoSR", config_name="config.yaml", weight_name="model.ckpt", ) # adjust the chunk size to balance between speed and memory usage model.renderer.set_chunk_size(8192) model.to(device) ### PeRFlow-T2I # pipe_t2i = StableDiffusionPipeline.from_pretrained("Lykon/dreamshaper-8", torch_dtype=torch.float16, safety_checker=None) # pipe_t2i = StableDiffusionPipeline.from_pretrained("stablediffusionapi/disney-pixar-cartoon", torch_dtype=torch.float16, safety_checker=None) # delta_weights = UNet2DConditionModel.from_pretrained("hansyan/piecewise-rectified-flow-delta-weights", torch_dtype=torch.float16, variant="v0-1",).state_dict() # pipe_t2i = merge_delta_weights_into_unet(pipe_t2i, delta_weights) pipe_t2i = StableDiffusionPipeline.from_pretrained("hansyan/perflow-sd15-disney", torch_dtype=torch.float16, safety_checker=None) pipe_t2i.scheduler = PeRFlowScheduler.from_config(pipe_t2i.scheduler.config, prediction_type="epsilon", num_time_windows=4) pipe_t2i.to('cuda:0', torch.float16) ### gradio rembg_session = rembg.new_session() #@spaces.GPU def generate(text, seed): def fill_background(image): image = np.array(image).astype(np.float32) / 255.0 image = image[:, :, :3] * image[:, :, 3:4] + (1 - image[:, :, 3:4]) * 0.5 image = Image.fromarray((image * 255.0).astype(np.uint8)) return image setup_seed(int(seed)) prompt_prefix = "high quality, highly detailed, (best quality, masterpiece), " neg_prompt = "EasyNegative, drawn by bad-artist, sketch by bad-artist-anime, (bad_prompt:0.8), (artist name, signature, watermark:1.4), (ugly:1.2), (worst quality, poor details:1.4), bad-hands-5, badhandv4, blurry" text = prompt_prefix + text samples = pipe_t2i( prompt = [text], negative_prompt = [neg_prompt], height = 512, width = 512, # num_inference_steps = 6, # guidance_scale = 7.5, num_inference_steps = 8, guidance_scale = 7.5, output_type = 'pt', ).images samples = samples.squeeze(0).permute(1, 2, 0).cpu().numpy()*255. samples = samples.astype(np.uint8) samples = Image.fromarray(samples[:, :, :3]) return samples #@spaces.GPU def render(image, mc_resolution=256, formats=["obj"]): image = Image.fromarray(image) image = image.resize((768, 768)) image = remove_background(image, rembg_session) image = resize_foreground(image, 0.85) image = fill_background(image) scene_codes = model(image, device=device) mesh = model.extract_mesh(scene_codes, resolution=mc_resolution)[0] mesh = to_gradio_3d_orientation(mesh) rv = [] for format in formats: mesh_path = tempfile.NamedTemporaryFile(suffix=f".{format}", delete=False) mesh.export(mesh_path.name) rv.append(mesh_path.name) return rv[0] # layout css = """ h1 { text-align: center; display:block; } h2 { text-align: center; display:block; } h3 { text-align: center; display:block; } """ with gr.Blocks(title="TripoSR", css=css) as interface: gr.Markdown( """ # Instant Text-to-3D Mesh Demo ### [PeRFlow](https://github.com/magic-research/piecewise-rectified-flow)-T2I + [TripoSR](https://github.com/VAST-AI-Research/TripoSR) Two-stage synthesis: 1) generating images by PeRFlow-T2I; 2) rendering 3D assests. Here, we plug the PeRFlow-delta-weights of SD-v1.5 into the Disney-Pixar-Cartoon dreambooth. """ ) with gr.Column(): with gr.Row(): output_image = gr.Image(label='Generated Image', height=384,) output_model_obj = gr.Model3D( label="Output 3D Model (OBJ Format)", interactive=False, height=384, ) with gr.Row(): textbox = gr.Textbox(label="Input Prompt", value="a husky dog") seed = gr.Textbox(label="Random Seed", value=42) gr.Markdown( """ Images should be generated within 1 second normally, sometimes, it could a bit slow due to warm-up of the program. Here are some examples provided: - a policeman - a robot, close-up - a red car, side view - a blue mug - a burger - a tea pot - a wooden chair - a unicorn """ ) # activate textbox.submit( fn=generate, inputs=[textbox, seed], outputs=[output_image], ).success( fn=render, inputs=[output_image], outputs=[output_model_obj], ) seed.submit( fn=generate, inputs=[textbox, seed], outputs=[output_image], ).success( fn=render, inputs=[output_image], outputs=[output_model_obj], ) if __name__ == '__main__': interface.queue(max_size=10) interface.launch()