Spaces:

wl-zhao
/

unipc_sdm

Runtime error

unipc_sdm / app.py

zwl

fix bug

4d1940c about 1 year ago

10.9 kB

	from diffusers import AutoencoderKL, UNet2DConditionModel, StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, UniPCMultistepScheduler
	from diffusers import StableDiffusionXLPipeline
	from diffusers import StableDiffusionXLImg2ImgPipeline
	import gradio as gr
	import torch
	from PIL import Image
	import os

	scheduler = UniPCMultistepScheduler(
	beta_start=0.00085,
	beta_end=0.012,
	beta_schedule="scaled_linear",
	num_train_timesteps=1000,
	trained_betas=None,
	thresholding=False,
	predict_x0=True,
	solver_type="bh2",
	lower_order_final=True,
	disable_corrector=[0],
	)

	class Model:
	def __init__(self, name, path, prefix):
	self.name = name
	self.path = path
	self.prefix = prefix
	self.pipe_t2i = None
	self.pipe_i2i = None

	models = [
	Model("Stable-Diffusion-XL", "stabilityai/stable-diffusion-xl-base-1.0", "The XL version of official stable-diffusion"),
	Model("Stable-Diffusion-v1.4", "CompVis/stable-diffusion-v1-4", "The 1.4 version of official stable-diffusion"),
	Model("Waifu", "hakurei/waifu-diffusion", "anime style"),
	]

	last_mode = "txt2img"
	current_model = models[0]
	current_model_path = current_model.path

	auth_token = os.getenv("HUGGING_FACE_HUB_TOKEN")

	print(f"Is CUDA available: {torch.cuda.is_available()}")

	if torch.cuda.is_available():
	vae = AutoencoderKL.from_pretrained(current_model.path, subfolder="vae", torch_dtype=torch.float16, use_auth_token=auth_token)
	for model in models:
	try:
	if 'XL'in model.name:
	PipeClass = StableDiffusionXLPipeline
	PipeI2IClass = StableDiffusionXLImg2ImgPipeline
	else:
	PipeClass = StableDIffusionXPipeline
	PipeI2IClass = StableDiffusionImg2ImgPipeline

	unet = UNet2DConditionModel.from_pretrained(model.path, subfolder="unet", torch_dtype=torch.float16, use_auth_token=auth_token)
	model.pipe_t2i = PipeClass.from_pretrained(model.path, unet=unet, vae=vae, torch_dtype=torch.float16, scheduler=scheduler, use_auth_token=auth_token)
	model.pipe_i2i = PipeI2IClass.from_pretrained(model.path, unet=unet, vae=vae, torch_dtype=torch.float16, scheduler=scheduler, use_auth_token=auth_token)
	except:
	models.remove(model)
	pipe = models[0].pipe_t2i
	pipe = pipe.to("cuda")

	else:
	vae = AutoencoderKL.from_pretrained(current_model.path, subfolder="vae", use_auth_token=auth_token)
	for model in models:
	try:
	unet = UNet2DConditionModel.from_pretrained(model.path, subfolder="unet", use_auth_token=auth_token)
	model.pipe_t2i = StableDiffusionPipeline.from_pretrained(model.path, unet=unet, vae=vae, scheduler=scheduler, use_auth_token=auth_token)
	model.pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(model.path, unet=unet, vae=vae, scheduler=scheduler, use_auth_token=auth_token)
	except:
	models.remove(model)
	pipe = models[0].pipe_t2i
	pipe = pipe.to("cpu")

	device = "GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"

	def inference(model_name, prompt, guidance, steps, width=512, height=512, seed=0, img=None, strength=0.5, neg_prompt=""):

	global current_model
	for model in models:
	if model.name == model_name:
	current_model = model
	model_path = current_model.path

	generator = torch.Generator('cuda' if torch.cuda.is_available() else 'cpu').manual_seed(seed) if seed != 0 else None

	if img is not None:
	return img_to_img(model_path, prompt, neg_prompt, img, strength, guidance, steps, width, height, generator)
	else:
	return txt_to_img(model_path, prompt, neg_prompt, guidance, steps, width, height, generator)

	def txt_to_img(model_path, prompt, neg_prompt, guidance, steps, width, height, generator=None):

	global last_mode
	global pipe
	global current_model_path
	if model_path != current_model_path or last_mode != "txt2img":
	current_model_path = model_path

	pipe.to("cpu")
	pipe = current_model.pipe_t2i

	if torch.cuda.is_available():
	pipe = pipe.to("cuda")
	last_mode = "txt2img"

	prompt = current_model.prefix + prompt
	result = pipe(
	prompt,
	negative_prompt = neg_prompt,
	# num_images_per_prompt=n_images,
	num_inference_steps = int(steps),
	guidance_scale = guidance,
	width = width,
	height = height,
	generator = generator)

	return replace_nsfw_images(result)

	def img_to_img(model_path, prompt, neg_prompt, img, strength, guidance, steps, width, height, generator=None):

	global last_mode
	global pipe
	global current_model_path
	if model_path != current_model_path or last_mode != "img2img":
	current_model_path = model_path

	pipe.to("cpu")
	pipe = current_model.pipe_i2i

	if torch.cuda.is_available():
	pipe = pipe.to("cuda")
	last_mode = "img2img"

	prompt = current_model.prefix + prompt
	ratio = min(height / img.height, width / img.width)
	img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
	result = pipe(
	prompt,
	negative_prompt = neg_prompt,
	# num_images_per_prompt=n_images,
	image = img,
	num_inference_steps = int(steps),
	strength = strength,
	guidance_scale = guidance,
	#width = width,
	#height = height,
	generator = generator)

	return replace_nsfw_images(result)

	def replace_nsfw_images(results):
	for i in range(len(results.images)):
	try:
	if results.nsfw_content_detected[i]:
	results.images[i] = Image.open("nsfw.png")
	except:
	pass
	return results.images[0]

	css = """
	<style>
	.finetuned-diffusion-div {
	text-align: center;
	max-width: 700px;
	margin: 0 auto;
	font-family: 'IBM Plex Sans', sans-serif;
	}
	.finetuned-diffusion-div div {
	display: inline-flex;
	align-items: center;
	gap: 0.8rem;
	font-size: 1.75rem;
	}
	.finetuned-diffusion-div div h1 {
	font-weight: 900;
	margin-top: 15px;
	margin-bottom: 15px;
	text-align: center;
	line-height: 150%;
	}
	.finetuned-diffusion-div p {
	margin-bottom: 10px;
	font-size: 94%;
	}
	.finetuned-diffusion-div p a {
	text-decoration: underline;
	}
	.tabs {
	margin-top: 0px;
	margin-bottom: 0px;
	}
	#gallery {
	min-height: 20rem;
	}
	.container {
	max-width: 1000px;
	margin: auto;
	padding-top: 1.5rem;
	}
	</style>
	"""
	with gr.Blocks(css=css) as demo:
	gr.HTML(
	f"""
	<div class="finetuned-diffusion-div">
	<div>
	<h1>Stable-Diffusion with UniPC</h1>
	</div>
	<br>
	<p>
	❤️ Acknowledgement: Hardware resources of this demo are supported by HuggingFace 🤗 . Many thanks for the help!
	</p>
	<br>
	<p>
	This is a demo of sampling by UniPC with two variants of Stable Diffusion models, including <a href="https://huggingface.co/CompVis/stable-diffusion-v1-4">Stable-Diffusion-v1.4</a> and <a href="https://huggingface.co/hakurei/waifu-diffusion">Waifu</a>.
	</p>
	<br>
	<p>
	<a href="https://github.com/wl-zhao/UniPC">UniPC</a> is a training-free framework designed for the fast sampling of diffusion models, which consists of a corrector (UniC) and a predictor (UniP) that share a unified analytical form and support arbitrary orders.
	</p>
	<p>
	We use <a href="https://github.com/huggingface/diffusers">Diffusers</a> 🧨 to implement this demo, which currently supports the multistep UniPC scheduler. For more details of UniPC with Diffusers, check <a href="https://github.com/huggingface/diffusers/pull/2373">this pull request</a>.
	</p>
	<br>
	<br>
	<p>
	Running on <b>{device}</b>
	</p>
	</div>
	"""
	)

	with gr.Row():

	with gr.Column(scale=55):
	with gr.Group():
	model_name = gr.Dropdown(label="Model", choices=[m.name for m in models], value=current_model.name)
	with gr.Row():
	prompt = gr.Textbox(label="Prompt", show_label=False, max_lines=2,placeholder="Enter prompt. Style applied automatically").style(container=False)
	generate = gr.Button(value="Generate").style(rounded=(False, True, True, False))


	image_out = gr.Image(height=512)
	# gallery = gr.Gallery(
	# label="Generated images", show_label=False, elem_id="gallery"
	# ).style(grid=[1], height="auto")

	with gr.Column(scale=45):
	with gr.Tab("Options"):
	with gr.Group():
	neg_prompt = gr.Textbox(label="Negative prompt", placeholder="What to exclude from the image")

	# n_images = gr.Slider(label="Images", value=1, minimum=1, maximum=4, step=1)

	with gr.Row():
	guidance = gr.Slider(label="Guidance scale", value=7.5, maximum=15)
	steps = gr.Slider(label="Steps", value=25, minimum=2, maximum=100, step=1)

	with gr.Row():
	width = gr.Slider(label="Width", value=512, minimum=64, maximum=1024, step=8)
	height = gr.Slider(label="Height", value=512, minimum=64, maximum=1024, step=8)

	seed = gr.Slider(0, 2147483647, label='Seed (0 = random)', value=0, step=1)

	with gr.Tab("Image to image"):
	with gr.Group():
	image = gr.Image(label="Image", height=256, tool="editor", type="pil")
	strength = gr.Slider(label="Transformation strength", minimum=0, maximum=1, step=0.01, value=0.5)

	# model_name.change(lambda x: gr.update(visible = x == models[0].name), inputs=model_name, outputs=custom_model_group)

	inputs = [model_name, prompt, guidance, steps, width, height, seed, image, strength, neg_prompt]
	prompt.submit(inference, inputs=inputs, outputs=image_out)

	generate.click(inference, inputs=inputs, outputs=image_out)


	gr.Markdown('''
	Stable-diffusion Models by [CompVis](https://huggingface.co/CompVis) and [stabilityai](https://huggingface.co/stabilityai), Waifu-diffusion models by [@hakurei](https://huggingface.co/hakurei). Most of the code of this demo are copied from [@anzorq's fintuned-diffusion](https://huggingface.co/spaces/anzorq/finetuned_diffusion/tree/main) ❤️<br>
	Space by [Wenliang Zhao](https://github.com/wl-zhao).

	![visitors](https://visitor-badge.glitch.me/badge?page_id=wl-zhao.unipc_sdm)
	''')

	demo.queue(concurrency_count=1)
	demo.launch(debug=False, share=False)