perflow-triposr

Build error

App Files Files Community

perflow-triposr / app.py

cocktailpeanut

update

6defae8 9 months ago

raw

history blame contribute delete

6.58 kB

	#import spaces
	import os, logging, time, argparse, random, tempfile, rembg, shlex, subprocess
	import gradio as gr
	import numpy as np
	import torch
	from PIL import Image
	from functools import partial

	#subprocess.run(shlex.split('pip install wheel/torchmcubes-0.1.0-cp310-cp310-linux_x86_64.whl'))

	from tsr.system import TSR
	from tsr.utils import remove_background, resize_foreground, to_gradio_3d_orientation

	from src.scheduler_perflow import PeRFlowScheduler
	from diffusers import StableDiffusionPipeline, UNet2DConditionModel

	def fill_background(img):
	img = np.array(img).astype(np.float32) / 255.0
	img = img[:, :, :3] * img[:, :, 3:4] + (1 - img[:, :, 3:4]) * 0.5
	img = Image.fromarray((img * 255.0).astype(np.uint8))
	return img

	def merge_delta_weights_into_unet(pipe, delta_weights, org_alpha = 1.0):
	unet_weights = pipe.unet.state_dict()
	for key in delta_weights.keys():
	dtype = unet_weights[key].dtype
	try:
	unet_weights[key] = org_alpha * unet_weights[key].to(dtype=delta_weights[key].dtype) + delta_weights[key].to(device=unet_weights[key].device)
	except:
	unet_weights[key] = unet_weights[key].to(dtype=delta_weights[key].dtype)
	unet_weights[key] = unet_weights[key].to(dtype)
	pipe.unet.load_state_dict(unet_weights, strict=True)
	return pipe

	def setup_seed(seed):
	random.seed(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	torch.cuda.manual_seed_all(seed)
	torch.backends.cudnn.deterministic = True

	if torch.cuda.is_available():
	device = "cuda:0"
	else:
	device = "cpu"

	### TripoSR
	model = TSR.from_pretrained(
	"stabilityai/TripoSR",
	config_name="config.yaml",
	weight_name="model.ckpt",
	)
	# adjust the chunk size to balance between speed and memory usage
	model.renderer.set_chunk_size(8192)
	model.to(device)


	### PeRFlow-T2I
	# pipe_t2i = StableDiffusionPipeline.from_pretrained("Lykon/dreamshaper-8", torch_dtype=torch.float16, safety_checker=None)
	# pipe_t2i = StableDiffusionPipeline.from_pretrained("stablediffusionapi/disney-pixar-cartoon", torch_dtype=torch.float16, safety_checker=None)
	# delta_weights = UNet2DConditionModel.from_pretrained("hansyan/piecewise-rectified-flow-delta-weights", torch_dtype=torch.float16, variant="v0-1",).state_dict()
	# pipe_t2i = merge_delta_weights_into_unet(pipe_t2i, delta_weights)

	pipe_t2i = StableDiffusionPipeline.from_pretrained("hansyan/perflow-sd15-disney", torch_dtype=torch.float16, safety_checker=None)
	pipe_t2i.scheduler = PeRFlowScheduler.from_config(pipe_t2i.scheduler.config, prediction_type="epsilon", num_time_windows=4)
	pipe_t2i.to('cuda:0', torch.float16)


	### gradio
	rembg_session = rembg.new_session()

	#@spaces.GPU
	def generate(text, seed):
	def fill_background(image):
	image = np.array(image).astype(np.float32) / 255.0
	image = image[:, :, :3] * image[:, :, 3:4] + (1 - image[:, :, 3:4]) * 0.5
	image = Image.fromarray((image * 255.0).astype(np.uint8))
	return image

	setup_seed(int(seed))
	prompt_prefix = "high quality, highly detailed, (best quality, masterpiece), "
	neg_prompt = "EasyNegative, drawn by bad-artist, sketch by bad-artist-anime, (bad_prompt:0.8), (artist name, signature, watermark:1.4), (ugly:1.2), (worst quality, poor details:1.4), bad-hands-5, badhandv4, blurry"
	text = prompt_prefix + text
	samples = pipe_t2i(
	prompt = [text],
	negative_prompt = [neg_prompt],
	height = 512,
	width = 512,
	# num_inference_steps = 6,
	# guidance_scale = 7.5,
	num_inference_steps = 8,
	guidance_scale = 7.5,
	output_type = 'pt',
	).images
	samples = samples.squeeze(0).permute(1, 2, 0).cpu().numpy()*255.
	samples = samples.astype(np.uint8)
	samples = Image.fromarray(samples[:, :, :3])
	return samples


	#@spaces.GPU
	def render(image, mc_resolution=256, formats=["obj"]):
	image = Image.fromarray(image)
	image = image.resize((768, 768))
	image = remove_background(image, rembg_session)
	image = resize_foreground(image, 0.85)
	image = fill_background(image)

	scene_codes = model(image, device=device)
	mesh = model.extract_mesh(scene_codes, resolution=mc_resolution)[0]
	mesh = to_gradio_3d_orientation(mesh)
	rv = []
	for format in formats:
	mesh_path = tempfile.NamedTemporaryFile(suffix=f".{format}", delete=False)
	mesh.export(mesh_path.name)
	rv.append(mesh_path.name)
	return rv[0]


	# layout
	css = """
	h1 {
	text-align: center;
	display:block;
	}
	h2 {
	text-align: center;
	display:block;
	}
	h3 {
	text-align: center;
	display:block;
	}
	"""
	with gr.Blocks(title="TripoSR", css=css) as interface:
	gr.Markdown(
	"""
	# Instant Text-to-3D Mesh Demo

	### [PeRFlow](https://github.com/magic-research/piecewise-rectified-flow)-T2I + [TripoSR](https://github.com/VAST-AI-Research/TripoSR)

	Two-stage synthesis: 1) generating images by PeRFlow-T2I; 2) rendering 3D assests. Here, we plug the PeRFlow-delta-weights of SD-v1.5 into the Disney-Pixar-Cartoon dreambooth.
	"""
	)

	with gr.Column():
	with gr.Row():
	output_image = gr.Image(label='Generated Image', height=384,)

	output_model_obj = gr.Model3D(
	label="Output 3D Model (OBJ Format)",
	interactive=False,
	height=384,
	)

	with gr.Row():
	textbox = gr.Textbox(label="Input Prompt", value="a husky dog")
	seed = gr.Textbox(label="Random Seed", value=42)


	gr.Markdown(
	"""
	Images should be generated within 1 second normally, sometimes, it could a bit slow due to warm-up of the program. Here are some examples provided:
	- a policeman
	- a robot, close-up
	- a red car, side view
	- a blue mug
	- a burger
	- a tea pot
	- a wooden chair
	- a unicorn
	"""
	)

	# activate
	textbox.submit(
	fn=generate,
	inputs=[textbox, seed],
	outputs=[output_image],
	).success(
	fn=render,
	inputs=[output_image],
	outputs=[output_model_obj],
	)

	seed.submit(
	fn=generate,
	inputs=[textbox, seed],
	outputs=[output_image],
	).success(
	fn=render,
	inputs=[output_image],
	outputs=[output_model_obj],
	)



	if __name__ == '__main__':
	interface.queue(max_size=10)
	interface.launch()