Spaces:
Runtime error
Runtime error
#!/usr/bin/env python | |
import cv2 | |
import numpy as np | |
import torch | |
import random | |
import base64 | |
import json | |
import threading | |
import uuid | |
import math | |
import io | |
from PIL import Image | |
from diffusers import ( | |
AutoencoderKL, | |
StableDiffusionControlNetPipeline, | |
ControlNetModel, | |
UniPCMultistepScheduler, | |
StableDiffusionControlNetImg2ImgPipeline, | |
StableDiffusionXLControlNetPipeline, | |
DiffusionPipeline, | |
) | |
from diffusers.utils import load_image | |
from transformers import pipeline | |
import gradio as gr | |
vae = AutoencoderKL.from_pretrained( | |
"stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16 | |
) | |
canny_controlnet = ControlNetModel.from_pretrained( | |
"lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16 | |
) | |
canny_pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
"SG161222/Realistic_Vision_V3.0_VAE", | |
controlnet=canny_controlnet, | |
torch_dtype=torch.float16, | |
use_safetensors=True, | |
) | |
canny_controlnet_tile = ControlNetModel.from_pretrained( | |
"lllyasviel/control_v11f1e_sd15_tile", torch_dtype=torch.float16 | |
) | |
canny_pipe_img2img = StableDiffusionControlNetImg2ImgPipeline.from_pretrained( | |
"SG161222/Realistic_Vision_V3.0_VAE", | |
controlnet=canny_controlnet_tile, | |
torch_dtype=torch.float16, | |
use_safetensors=True, | |
) | |
canny_pipe_img2img.enable_model_cpu_offload() | |
canny_pipe_img2img.enable_xformers_memory_efficient_attention() | |
canny_pipe.scheduler = UniPCMultistepScheduler.from_config(canny_pipe.scheduler.config) | |
canny_pipe.enable_model_cpu_offload() | |
canny_pipe.enable_xformers_memory_efficient_attention() | |
controlnet_xl = ControlNetModel.from_pretrained( | |
"diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16 | |
) | |
vae_xl = AutoencoderKL.from_pretrained( | |
"madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16 | |
) | |
pipe_xl = StableDiffusionXLControlNetPipeline.from_pretrained( | |
"stabilityai/stable-diffusion-xl-base-1.0", | |
controlnet=controlnet_xl, | |
vae=vae_xl, | |
torch_dtype=torch.float16, | |
use_safetensors=True, | |
variant="fp16", | |
) | |
pipe_xl.scheduler = UniPCMultistepScheduler.from_config(pipe_xl.scheduler.config) | |
pipe_xl.enable_xformers_memory_efficient_attention() | |
pipe_xl.enable_model_cpu_offload() | |
refiner = DiffusionPipeline.from_pretrained( | |
"stabilityai/stable-diffusion-xl-refiner-1.0", | |
text_encoder_2=pipe_xl.text_encoder_2, | |
vae=pipe_xl.vae, | |
torch_dtype=torch.float16, | |
use_safetensors=True, | |
variant="fp16", | |
) | |
refiner.enable_xformers_memory_efficient_attention() | |
refiner.enable_model_cpu_offload() | |
def resize_image_output(im, width, height): | |
im = np.array(im) | |
newSize = (width, height) | |
img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC) | |
img = Image.fromarray(img) | |
return img | |
def resize_image(im, max_size=590000): | |
[x, y, z] = im.shape | |
new_size = [0, 0] | |
min_size = 262144 | |
if x * y > max_size: | |
scale_ratio = math.sqrt((x * y) / max_size) | |
new_size[0] = int(x / scale_ratio) | |
new_size[1] = int(y / scale_ratio) | |
elif x * y <= min_size: | |
scale_ratio = math.sqrt((x * y) / min_size) | |
new_size[0] = int(x / scale_ratio) | |
new_size[1] = int(y / scale_ratio) | |
else: | |
new_size[0] = int(x) | |
new_size[1] = int(y) | |
height = (new_size[0] // 8) * 8 | |
width = (new_size[1] // 8) * 8 | |
newSize = (width, height) | |
img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC) | |
return img | |
def process_canny_tile( | |
input_image, | |
control_image, | |
x, | |
y, | |
prompt, | |
a_prompt, | |
n_prompt, | |
num_samples, | |
image_resolution, | |
ddim_steps, | |
guess_mode, | |
strength_conditioning, | |
scale, | |
seed, | |
eta, | |
low_threshold, | |
high_threshold, | |
): | |
image = input_image | |
return canny_pipe_img2img( | |
prompt="", | |
image=image, | |
control_image=image, | |
num_inference_steps=20, | |
guidance_scale=4, | |
strength=0.3, | |
guess_mode=True, | |
negative_prompt=n_prompt, | |
num_images_per_prompt=1, | |
eta=eta, | |
generator=torch.Generator(device="cpu").manual_seed(seed), | |
) | |
def process_canny( | |
input_image, | |
x, | |
y, | |
prompt, | |
a_prompt, | |
n_prompt, | |
num_samples, | |
image_resolution, | |
ddim_steps, | |
guess_mode, | |
strength, | |
scale, | |
seed, | |
eta, | |
low_threshold, | |
high_threshold, | |
): | |
image = input_image | |
return canny_pipe( | |
prompt=",".join([prompt, a_prompt]), | |
image=image, | |
height=x, | |
width=y, | |
num_inference_steps=ddim_steps, | |
guidance_scale=scale, | |
negative_prompt=n_prompt, | |
num_images_per_prompt=num_samples, | |
eta=eta, | |
controlnet_conditioning_scale=strength, | |
generator=torch.Generator(device="cpu").manual_seed(seed), | |
) | |
def process_canny_sdxl( | |
input_image, | |
x, | |
y, | |
prompt, | |
a_prompt, | |
n_prompt, | |
num_samples, | |
image_resolution, | |
ddim_steps, | |
guess_mode, | |
strength, | |
scale, | |
seed, | |
eta, | |
low_threshold, | |
high_threshold, | |
): | |
image = input_image | |
image = pipe_xl( | |
prompt=",".join([prompt, a_prompt]), | |
image=image, | |
height=x, | |
width=y, | |
num_inference_steps=ddim_steps, | |
guidance_scale=scale, | |
negative_prompt=n_prompt, | |
num_images_per_prompt=num_samples, | |
eta=eta, | |
controlnet_conditioning_scale=strength, | |
generator=torch.Generator(device="cpu").manual_seed(seed), | |
output_type="latent", | |
).images | |
return refiner( | |
prompt=prompt, | |
num_inference_steps=ddim_steps, | |
num_images_per_prompt=num_samples, | |
denoising_start=0.8, | |
image=image, | |
) | |
def process( | |
image, | |
prompt, | |
a_prompt, | |
n_prompt, | |
ddim_steps, | |
strength, | |
scale, | |
seed, | |
eta, | |
low_threshold, | |
high_threshold, | |
): | |
image = load_image(image) | |
image = np.array(image) | |
[x_orig, y_orig, z_orig] = image.shape | |
image = resize_image(image) | |
[x, y, z] = image.shape | |
image = cv2.Canny(image, low_threshold, high_threshold) | |
image = image[:, :, None] | |
image = np.concatenate([image, image, image], axis=2) | |
image = Image.fromarray(image) | |
result = process_canny( | |
image, | |
x, | |
y, | |
prompt, | |
a_prompt, | |
n_prompt, | |
1, | |
None, | |
ddim_steps, | |
False, | |
float(strength), | |
scale, | |
seed, | |
eta, | |
low_threshold, | |
high_threshold, | |
) | |
im = result.images[0] | |
im = resize_image_output(im, y_orig, x_orig) | |
highres = False | |
if highres: | |
result_upscaled = process_canny_tile( | |
im, | |
im, | |
x_orig, | |
y_orig, | |
prompt, | |
a_prompt, | |
n_prompt, | |
num_samples, | |
None, | |
ddim_steps, | |
False, | |
strength, | |
scale, | |
seed, | |
eta, | |
low_threshold, | |
high_threshold, | |
) | |
im = result_upscaled.images[0] | |
return im | |
demo = gr.Blocks().queue() | |
with demo: | |
with gr.Row(): | |
gr.Markdown("## Control Stable Diffusion with Canny Edge Maps") | |
with gr.Row(): | |
with gr.Column(): | |
input_image = gr.Image(type="pil", label="Input Image") | |
input_prompt = gr.Textbox() | |
run_button = gr.Button(label="Run") | |
with gr.Accordion("Advanced Options", open=False): | |
strength = gr.Slider( | |
label="Control Strength", | |
minimum=0.0, | |
maximum=2.0, | |
value=1.0, | |
step=0.01, | |
) | |
low_threshold = gr.Slider( | |
label="Canny low threshold", | |
minimum=1, | |
maximum=255, | |
value=100, | |
step=1, | |
) | |
high_threshold = gr.Slider( | |
label="Canny high threshold", | |
minimum=1, | |
maximum=255, | |
value=200, | |
step=1, | |
) | |
ddim_steps = gr.Slider( | |
label="Steps", minimum=1, maximum=100, value=20, step=1 | |
) | |
scale = gr.Slider( | |
label="Guidance Scale", | |
minimum=0.1, | |
maximum=30.0, | |
value=7.5, | |
step=0.1, | |
) # default value was 9.0 | |
seed = gr.Slider( | |
label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True | |
) | |
eta = gr.Number(label="eta (DDIM)", value=0.0) | |
a_prompt = gr.Textbox( | |
label="Added Prompt", value="best quality, extremely detailed" | |
) | |
n_prompt = gr.Textbox( | |
label="Negative Prompt", | |
value="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality", | |
) | |
with gr.Column(): | |
result = gr.Image(label="Output", type="numpy") | |
ips = [ | |
input_image, | |
input_prompt, | |
a_prompt, | |
n_prompt, | |
ddim_steps, | |
strength, | |
scale, | |
seed, | |
eta, | |
low_threshold, | |
high_threshold, | |
] | |
run_button.click(fn=process, inputs=ips, outputs=[result]) | |
demo.launch() | |