unipc_sdm / app.py
zwl
fix bug
4d1940c
from diffusers import AutoencoderKL, UNet2DConditionModel, StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, UniPCMultistepScheduler
from diffusers import StableDiffusionXLPipeline
from diffusers import StableDiffusionXLImg2ImgPipeline
import gradio as gr
import torch
from PIL import Image
import os
scheduler = UniPCMultistepScheduler(
beta_start=0.00085,
beta_end=0.012,
beta_schedule="scaled_linear",
num_train_timesteps=1000,
trained_betas=None,
thresholding=False,
predict_x0=True,
solver_type="bh2",
lower_order_final=True,
disable_corrector=[0],
)
class Model:
def __init__(self, name, path, prefix):
self.name = name
self.path = path
self.prefix = prefix
self.pipe_t2i = None
self.pipe_i2i = None
models = [
Model("Stable-Diffusion-XL", "stabilityai/stable-diffusion-xl-base-1.0", "The XL version of official stable-diffusion"),
Model("Stable-Diffusion-v1.4", "CompVis/stable-diffusion-v1-4", "The 1.4 version of official stable-diffusion"),
Model("Waifu", "hakurei/waifu-diffusion", "anime style"),
]
last_mode = "txt2img"
current_model = models[0]
current_model_path = current_model.path
auth_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
print(f"Is CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
vae = AutoencoderKL.from_pretrained(current_model.path, subfolder="vae", torch_dtype=torch.float16, use_auth_token=auth_token)
for model in models:
try:
if 'XL'in model.name:
PipeClass = StableDiffusionXLPipeline
PipeI2IClass = StableDiffusionXLImg2ImgPipeline
else:
PipeClass = StableDIffusionXPipeline
PipeI2IClass = StableDiffusionImg2ImgPipeline
unet = UNet2DConditionModel.from_pretrained(model.path, subfolder="unet", torch_dtype=torch.float16, use_auth_token=auth_token)
model.pipe_t2i = PipeClass.from_pretrained(model.path, unet=unet, vae=vae, torch_dtype=torch.float16, scheduler=scheduler, use_auth_token=auth_token)
model.pipe_i2i = PipeI2IClass.from_pretrained(model.path, unet=unet, vae=vae, torch_dtype=torch.float16, scheduler=scheduler, use_auth_token=auth_token)
except:
models.remove(model)
pipe = models[0].pipe_t2i
pipe = pipe.to("cuda")
else:
vae = AutoencoderKL.from_pretrained(current_model.path, subfolder="vae", use_auth_token=auth_token)
for model in models:
try:
unet = UNet2DConditionModel.from_pretrained(model.path, subfolder="unet", use_auth_token=auth_token)
model.pipe_t2i = StableDiffusionPipeline.from_pretrained(model.path, unet=unet, vae=vae, scheduler=scheduler, use_auth_token=auth_token)
model.pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(model.path, unet=unet, vae=vae, scheduler=scheduler, use_auth_token=auth_token)
except:
models.remove(model)
pipe = models[0].pipe_t2i
pipe = pipe.to("cpu")
device = "GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"
def inference(model_name, prompt, guidance, steps, width=512, height=512, seed=0, img=None, strength=0.5, neg_prompt=""):
global current_model
for model in models:
if model.name == model_name:
current_model = model
model_path = current_model.path
generator = torch.Generator('cuda' if torch.cuda.is_available() else 'cpu').manual_seed(seed) if seed != 0 else None
if img is not None:
return img_to_img(model_path, prompt, neg_prompt, img, strength, guidance, steps, width, height, generator)
else:
return txt_to_img(model_path, prompt, neg_prompt, guidance, steps, width, height, generator)
def txt_to_img(model_path, prompt, neg_prompt, guidance, steps, width, height, generator=None):
global last_mode
global pipe
global current_model_path
if model_path != current_model_path or last_mode != "txt2img":
current_model_path = model_path
pipe.to("cpu")
pipe = current_model.pipe_t2i
if torch.cuda.is_available():
pipe = pipe.to("cuda")
last_mode = "txt2img"
prompt = current_model.prefix + prompt
result = pipe(
prompt,
negative_prompt = neg_prompt,
# num_images_per_prompt=n_images,
num_inference_steps = int(steps),
guidance_scale = guidance,
width = width,
height = height,
generator = generator)
return replace_nsfw_images(result)
def img_to_img(model_path, prompt, neg_prompt, img, strength, guidance, steps, width, height, generator=None):
global last_mode
global pipe
global current_model_path
if model_path != current_model_path or last_mode != "img2img":
current_model_path = model_path
pipe.to("cpu")
pipe = current_model.pipe_i2i
if torch.cuda.is_available():
pipe = pipe.to("cuda")
last_mode = "img2img"
prompt = current_model.prefix + prompt
ratio = min(height / img.height, width / img.width)
img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
result = pipe(
prompt,
negative_prompt = neg_prompt,
# num_images_per_prompt=n_images,
image = img,
num_inference_steps = int(steps),
strength = strength,
guidance_scale = guidance,
#width = width,
#height = height,
generator = generator)
return replace_nsfw_images(result)
def replace_nsfw_images(results):
for i in range(len(results.images)):
try:
if results.nsfw_content_detected[i]:
results.images[i] = Image.open("nsfw.png")
except:
pass
return results.images[0]
css = """
<style>
.finetuned-diffusion-div {
text-align: center;
max-width: 700px;
margin: 0 auto;
font-family: 'IBM Plex Sans', sans-serif;
}
.finetuned-diffusion-div div {
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
}
.finetuned-diffusion-div div h1 {
font-weight: 900;
margin-top: 15px;
margin-bottom: 15px;
text-align: center;
line-height: 150%;
}
.finetuned-diffusion-div p {
margin-bottom: 10px;
font-size: 94%;
}
.finetuned-diffusion-div p a {
text-decoration: underline;
}
.tabs {
margin-top: 0px;
margin-bottom: 0px;
}
#gallery {
min-height: 20rem;
}
.container {
max-width: 1000px;
margin: auto;
padding-top: 1.5rem;
}
</style>
"""
with gr.Blocks(css=css) as demo:
gr.HTML(
f"""
<div class="finetuned-diffusion-div">
<div>
<h1>Stable-Diffusion with UniPC</h1>
</div>
<br>
<p>
❤️ Acknowledgement: Hardware resources of this demo are supported by HuggingFace 🤗 . Many thanks for the help!
</p>
<br>
<p>
This is a demo of sampling by UniPC with two variants of Stable Diffusion models, including <a href="https://huggingface.co/CompVis/stable-diffusion-v1-4">Stable-Diffusion-v1.4</a> and <a href="https://huggingface.co/hakurei/waifu-diffusion">Waifu</a>.
</p>
<br>
<p>
<a href="https://github.com/wl-zhao/UniPC">UniPC</a> is a training-free framework designed for the fast sampling of diffusion models, which consists of a corrector (UniC) and a predictor (UniP) that share a unified analytical form and support arbitrary orders.
</p>
<p>
We use <a href="https://github.com/huggingface/diffusers">Diffusers</a> 🧨 to implement this demo, which currently supports the multistep UniPC scheduler. For more details of UniPC with Diffusers, check <a href="https://github.com/huggingface/diffusers/pull/2373">this pull request</a>.
</p>
<br>
<br>
<p>
Running on <b>{device}</b>
</p>
</div>
"""
)
with gr.Row():
with gr.Column(scale=55):
with gr.Group():
model_name = gr.Dropdown(label="Model", choices=[m.name for m in models], value=current_model.name)
with gr.Row():
prompt = gr.Textbox(label="Prompt", show_label=False, max_lines=2,placeholder="Enter prompt. Style applied automatically").style(container=False)
generate = gr.Button(value="Generate").style(rounded=(False, True, True, False))
image_out = gr.Image(height=512)
# gallery = gr.Gallery(
# label="Generated images", show_label=False, elem_id="gallery"
# ).style(grid=[1], height="auto")
with gr.Column(scale=45):
with gr.Tab("Options"):
with gr.Group():
neg_prompt = gr.Textbox(label="Negative prompt", placeholder="What to exclude from the image")
# n_images = gr.Slider(label="Images", value=1, minimum=1, maximum=4, step=1)
with gr.Row():
guidance = gr.Slider(label="Guidance scale", value=7.5, maximum=15)
steps = gr.Slider(label="Steps", value=25, minimum=2, maximum=100, step=1)
with gr.Row():
width = gr.Slider(label="Width", value=512, minimum=64, maximum=1024, step=8)
height = gr.Slider(label="Height", value=512, minimum=64, maximum=1024, step=8)
seed = gr.Slider(0, 2147483647, label='Seed (0 = random)', value=0, step=1)
with gr.Tab("Image to image"):
with gr.Group():
image = gr.Image(label="Image", height=256, tool="editor", type="pil")
strength = gr.Slider(label="Transformation strength", minimum=0, maximum=1, step=0.01, value=0.5)
# model_name.change(lambda x: gr.update(visible = x == models[0].name), inputs=model_name, outputs=custom_model_group)
inputs = [model_name, prompt, guidance, steps, width, height, seed, image, strength, neg_prompt]
prompt.submit(inference, inputs=inputs, outputs=image_out)
generate.click(inference, inputs=inputs, outputs=image_out)
gr.Markdown('''
Stable-diffusion Models by [CompVis](https://huggingface.co/CompVis) and [stabilityai](https://huggingface.co/stabilityai), Waifu-diffusion models by [@hakurei](https://huggingface.co/hakurei). Most of the code of this demo are copied from [@anzorq's fintuned-diffusion](https://huggingface.co/spaces/anzorq/finetuned_diffusion/tree/main) ❤️<br>
Space by [Wenliang Zhao](https://github.com/wl-zhao).
![visitors](https://visitor-badge.glitch.me/badge?page_id=wl-zhao.unipc_sdm)
''')
demo.queue(concurrency_count=1)
demo.launch(debug=False, share=False)