ControlNetV1.1 / app.py
rbanfield's picture
Upload folder using huggingface_hub
05c6bed
raw
history blame contribute delete
No virus
9.56 kB
#!/usr/bin/env python
import cv2
import numpy as np
import torch
import random
import base64
import json
import threading
import uuid
import math
import io
from PIL import Image
from diffusers import (
AutoencoderKL,
StableDiffusionControlNetPipeline,
ControlNetModel,
UniPCMultistepScheduler,
StableDiffusionControlNetImg2ImgPipeline,
StableDiffusionXLControlNetPipeline,
DiffusionPipeline,
)
from diffusers.utils import load_image
from transformers import pipeline
import gradio as gr
vae = AutoencoderKL.from_pretrained(
"stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16
)
canny_controlnet = ControlNetModel.from_pretrained(
"lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16
)
canny_pipe = StableDiffusionControlNetPipeline.from_pretrained(
"SG161222/Realistic_Vision_V3.0_VAE",
controlnet=canny_controlnet,
torch_dtype=torch.float16,
use_safetensors=True,
)
canny_controlnet_tile = ControlNetModel.from_pretrained(
"lllyasviel/control_v11f1e_sd15_tile", torch_dtype=torch.float16
)
canny_pipe_img2img = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
"SG161222/Realistic_Vision_V3.0_VAE",
controlnet=canny_controlnet_tile,
torch_dtype=torch.float16,
use_safetensors=True,
)
canny_pipe_img2img.enable_model_cpu_offload()
canny_pipe_img2img.enable_xformers_memory_efficient_attention()
canny_pipe.scheduler = UniPCMultistepScheduler.from_config(canny_pipe.scheduler.config)
canny_pipe.enable_model_cpu_offload()
canny_pipe.enable_xformers_memory_efficient_attention()
controlnet_xl = ControlNetModel.from_pretrained(
"diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16
)
vae_xl = AutoencoderKL.from_pretrained(
"madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
)
pipe_xl = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
controlnet=controlnet_xl,
vae=vae_xl,
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16",
)
pipe_xl.scheduler = UniPCMultistepScheduler.from_config(pipe_xl.scheduler.config)
pipe_xl.enable_xformers_memory_efficient_attention()
pipe_xl.enable_model_cpu_offload()
refiner = DiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-refiner-1.0",
text_encoder_2=pipe_xl.text_encoder_2,
vae=pipe_xl.vae,
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16",
)
refiner.enable_xformers_memory_efficient_attention()
refiner.enable_model_cpu_offload()
def resize_image_output(im, width, height):
im = np.array(im)
newSize = (width, height)
img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
img = Image.fromarray(img)
return img
def resize_image(im, max_size=590000):
[x, y, z] = im.shape
new_size = [0, 0]
min_size = 262144
if x * y > max_size:
scale_ratio = math.sqrt((x * y) / max_size)
new_size[0] = int(x / scale_ratio)
new_size[1] = int(y / scale_ratio)
elif x * y <= min_size:
scale_ratio = math.sqrt((x * y) / min_size)
new_size[0] = int(x / scale_ratio)
new_size[1] = int(y / scale_ratio)
else:
new_size[0] = int(x)
new_size[1] = int(y)
height = (new_size[0] // 8) * 8
width = (new_size[1] // 8) * 8
newSize = (width, height)
img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
return img
def process_canny_tile(
input_image,
control_image,
x,
y,
prompt,
a_prompt,
n_prompt,
num_samples,
image_resolution,
ddim_steps,
guess_mode,
strength_conditioning,
scale,
seed,
eta,
low_threshold,
high_threshold,
):
image = input_image
return canny_pipe_img2img(
prompt="",
image=image,
control_image=image,
num_inference_steps=20,
guidance_scale=4,
strength=0.3,
guess_mode=True,
negative_prompt=n_prompt,
num_images_per_prompt=1,
eta=eta,
generator=torch.Generator(device="cpu").manual_seed(seed),
)
def process_canny(
input_image,
x,
y,
prompt,
a_prompt,
n_prompt,
num_samples,
image_resolution,
ddim_steps,
guess_mode,
strength,
scale,
seed,
eta,
low_threshold,
high_threshold,
):
image = input_image
return canny_pipe(
prompt=",".join([prompt, a_prompt]),
image=image,
height=x,
width=y,
num_inference_steps=ddim_steps,
guidance_scale=scale,
negative_prompt=n_prompt,
num_images_per_prompt=num_samples,
eta=eta,
controlnet_conditioning_scale=strength,
generator=torch.Generator(device="cpu").manual_seed(seed),
)
def process_canny_sdxl(
input_image,
x,
y,
prompt,
a_prompt,
n_prompt,
num_samples,
image_resolution,
ddim_steps,
guess_mode,
strength,
scale,
seed,
eta,
low_threshold,
high_threshold,
):
image = input_image
image = pipe_xl(
prompt=",".join([prompt, a_prompt]),
image=image,
height=x,
width=y,
num_inference_steps=ddim_steps,
guidance_scale=scale,
negative_prompt=n_prompt,
num_images_per_prompt=num_samples,
eta=eta,
controlnet_conditioning_scale=strength,
generator=torch.Generator(device="cpu").manual_seed(seed),
output_type="latent",
).images
return refiner(
prompt=prompt,
num_inference_steps=ddim_steps,
num_images_per_prompt=num_samples,
denoising_start=0.8,
image=image,
)
def process(
image,
prompt,
a_prompt,
n_prompt,
ddim_steps,
strength,
scale,
seed,
eta,
low_threshold,
high_threshold,
):
image = load_image(image)
image = np.array(image)
[x_orig, y_orig, z_orig] = image.shape
image = resize_image(image)
[x, y, z] = image.shape
image = cv2.Canny(image, low_threshold, high_threshold)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
image = Image.fromarray(image)
result = process_canny(
image,
x,
y,
prompt,
a_prompt,
n_prompt,
1,
None,
ddim_steps,
False,
float(strength),
scale,
seed,
eta,
low_threshold,
high_threshold,
)
im = result.images[0]
im = resize_image_output(im, y_orig, x_orig)
highres = False
if highres:
result_upscaled = process_canny_tile(
im,
im,
x_orig,
y_orig,
prompt,
a_prompt,
n_prompt,
num_samples,
None,
ddim_steps,
False,
strength,
scale,
seed,
eta,
low_threshold,
high_threshold,
)
im = result_upscaled.images[0]
return im
demo = gr.Blocks().queue()
with demo:
with gr.Row():
gr.Markdown("## Control Stable Diffusion with Canny Edge Maps")
with gr.Row():
with gr.Column():
input_image = gr.Image(type="pil", label="Input Image")
input_prompt = gr.Textbox()
run_button = gr.Button(label="Run")
with gr.Accordion("Advanced Options", open=False):
strength = gr.Slider(
label="Control Strength",
minimum=0.0,
maximum=2.0,
value=1.0,
step=0.01,
)
low_threshold = gr.Slider(
label="Canny low threshold",
minimum=1,
maximum=255,
value=100,
step=1,
)
high_threshold = gr.Slider(
label="Canny high threshold",
minimum=1,
maximum=255,
value=200,
step=1,
)
ddim_steps = gr.Slider(
label="Steps", minimum=1, maximum=100, value=20, step=1
)
scale = gr.Slider(
label="Guidance Scale",
minimum=0.1,
maximum=30.0,
value=7.5,
step=0.1,
) # default value was 9.0
seed = gr.Slider(
label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True
)
eta = gr.Number(label="eta (DDIM)", value=0.0)
a_prompt = gr.Textbox(
label="Added Prompt", value="best quality, extremely detailed"
)
n_prompt = gr.Textbox(
label="Negative Prompt",
value="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
)
with gr.Column():
result = gr.Image(label="Output", type="numpy")
ips = [
input_image,
input_prompt,
a_prompt,
n_prompt,
ddim_steps,
strength,
scale,
seed,
eta,
low_threshold,
high_threshold,
]
run_button.click(fn=process, inputs=ips, outputs=[result])
demo.launch()