Spaces:

rbanfield
/

ControlNetV1.1

Runtime error

App Files Files Community

rbanfield commited on Sep 26, 2023

Commit

05c6bed

•

1 Parent(s): c28312e

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

.app.py.swp +0 -0
app.py +234 -57

.app.py.swp ADDED Viewed

Binary file (16.4 kB). View file

app.py CHANGED Viewed

@@ -13,23 +13,43 @@ import math
 import io
 from PIL import Image
-from diffusers import AutoencoderKL, StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler,StableDiffusionControlNetImg2ImgPipeline,StableDiffusionXLControlNetPipeline,DiffusionPipeline
 from diffusers.utils import load_image
 from transformers import pipeline
 import gradio as gr
-vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
-canny_controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16)
 canny_pipe = StableDiffusionControlNetPipeline.from_pretrained(
-    "SG161222/Realistic_Vision_V3.0_VAE", controlnet=canny_controlnet, torch_dtype=torch.float16, use_safetensors=True
 )
-canny_controlnet_tile = ControlNetModel.from_pretrained("lllyasviel/control_v11f1e_sd15_tile", torch_dtype=torch.float16)
 canny_pipe_img2img = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
-    "SG161222/Realistic_Vision_V3.0_VAE", controlnet=canny_controlnet_tile, torch_dtype=torch.float16, use_safetensors=True
 )
 canny_pipe_img2img.enable_model_cpu_offload()
 canny_pipe_img2img.enable_xformers_memory_efficient_attention()
@@ -40,10 +60,11 @@ canny_pipe.enable_model_cpu_offload()
 canny_pipe.enable_xformers_memory_efficient_attention()
 controlnet_xl = ControlNetModel.from_pretrained(
-    "diffusers/controlnet-canny-sdxl-1.0",
-    torch_dtype=torch.float16
 )
-vae_xl = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
 pipe_xl = StableDiffusionXLControlNetPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     controlnet=controlnet_xl,
@@ -67,62 +88,100 @@ refiner = DiffusionPipeline.from_pretrained(
 refiner.enable_xformers_memory_efficient_attention()
 refiner.enable_model_cpu_offload()
 def resize_image_output(im, width, height):
-    im = np.array(im)
-    newSize = (width,height)
     img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
     img = Image.fromarray(img)
     return img
-def resize_image(im, max_size = 590000):
-    [x,y,z] = im.shape
-    new_size = [0,0]
     min_size = 262144
-    if x*y > max_size:
-        scale_ratio = math.sqrt((x*y)/max_size)
         new_size[0] = int(x / scale_ratio)
         new_size[1] = int(y / scale_ratio)
-    elif x*y <= min_size:
-        scale_ratio = math.sqrt((x*y)/min_size)
         new_size[0] = int(x / scale_ratio)
         new_size[1] = int(y / scale_ratio)
     else:
         new_size[0] = int(x)
         new_size[1] = int(y)
     height = (new_size[0] // 8) * 8
     width = (new_size[1] // 8) * 8
-    newSize = (width,height)
     img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
     return img
-def process_canny_tile(input_image,control_image, x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength_conditioning, scale, seed, eta, low_threshold, high_threshold):
     image = input_image
     return canny_pipe_img2img(
-        prompt = '',
         image=image,
-        control_image = image,
         num_inference_steps=20,
         guidance_scale=4,
-        strength = 0.3,
-        guess_mode = True,
         negative_prompt=n_prompt,
         num_images_per_prompt=1,
         eta=eta,
-        generator=torch.Generator(device="cpu").manual_seed(seed)
     )
-def process_canny(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, low_threshold, high_threshold):
     image = input_image
     return canny_pipe(
-        prompt=','.join([prompt,a_prompt]),
         image=image,
         height=x,
         width=y,
@@ -132,15 +191,33 @@ def process_canny(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, ima
         num_images_per_prompt=num_samples,
         eta=eta,
         controlnet_conditioning_scale=strength,
-        generator=torch.Generator(device="cpu").manual_seed(seed)
     )
-def process_canny_sdxl(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, low_threshold, high_threshold):
     image = input_image
     image = pipe_xl(
-        prompt=','.join([prompt,a_prompt]),
         image=image,
         height=x,
         width=y,
@@ -151,31 +228,87 @@ def process_canny_sdxl(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples
         eta=eta,
         controlnet_conditioning_scale=strength,
         generator=torch.Generator(device="cpu").manual_seed(seed),
-        output_type="latent"
     ).images
     return refiner(
-    prompt=prompt,
-    num_inference_steps=ddim_steps,
-    num_images_per_prompt=num_samples,
-    denoising_start=0.8,
-    image=image,
     )
-def process(image, prompt, a_prompt, n_prompt, ddim_steps, strength, scale, seed, eta, low_threshold, high_threshold):
     image = load_image(image)
     image = np.array(image)
-    [x_orig,y_orig,z_orig] = image.shape
     image = resize_image(image)
-    [x,y,z] = image.shape
     image = cv2.Canny(image, low_threshold, high_threshold)
     image = image[:, :, None]
     image = np.concatenate([image, image, image], axis=2)
     image = Image.fromarray(image)
-    return process_canny(image,x,y, prompt, a_prompt, n_prompt, 1, None, ddim_steps, False, float(strength), scale, seed, eta, low_threshold, high_threshold)[0]
 demo = gr.Blocks().queue()
@@ -190,22 +323,66 @@ with demo:
             input_prompt = gr.Textbox()
             run_button = gr.Button(label="Run")
-            with gr.Accordion("Advanced Options"):
-                strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
-                low_threshold = gr.Slider(label="Canny low threshold", minimum=1, maximum=255, value=100, step=1)
-                high_threshold = gr.Slider(label="Canny high threshold", minimum=1, maximum=255, value=200, step=1)
-                ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
-                scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=7.5, step=0.1)  # default value was 9.0
-                seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
                 eta = gr.Number(label="eta (DDIM)", value=0.0)
-                a_prompt = gr.Textbox(label="Added Prompt", value='best quality, extremely detailed')
-                n_prompt = gr.Textbox(label="Negative Prompt",
-                                      value='longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality')
         with gr.Column():
-            result = gr.outputs.Image(label='Output', type="numpy")
-    ips = [input_image, input_prompt, a_prompt, n_prompt, ddim_steps, strength, scale, seed, eta, low_threshold, high_threshold]
     run_button.click(fn=process, inputs=ips, outputs=[result])

 import io
 from PIL import Image
+from diffusers import (
+    AutoencoderKL,
+    StableDiffusionControlNetPipeline,
+    ControlNetModel,
+    UniPCMultistepScheduler,
+    StableDiffusionControlNetImg2ImgPipeline,
+    StableDiffusionXLControlNetPipeline,
+    DiffusionPipeline,
+)
 from diffusers.utils import load_image
 from transformers import pipeline
 import gradio as gr
+vae = AutoencoderKL.from_pretrained(
+    "stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16
+)
+canny_controlnet = ControlNetModel.from_pretrained(
+    "lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16
+)
 canny_pipe = StableDiffusionControlNetPipeline.from_pretrained(
+    "SG161222/Realistic_Vision_V3.0_VAE",
+    controlnet=canny_controlnet,
+    torch_dtype=torch.float16,
+    use_safetensors=True,
 )
+canny_controlnet_tile = ControlNetModel.from_pretrained(
+    "lllyasviel/control_v11f1e_sd15_tile", torch_dtype=torch.float16
+)
 canny_pipe_img2img = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
+    "SG161222/Realistic_Vision_V3.0_VAE",
+    controlnet=canny_controlnet_tile,
+    torch_dtype=torch.float16,
+    use_safetensors=True,
 )
 canny_pipe_img2img.enable_model_cpu_offload()
 canny_pipe_img2img.enable_xformers_memory_efficient_attention()
 canny_pipe.enable_xformers_memory_efficient_attention()
 controlnet_xl = ControlNetModel.from_pretrained(
+    "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16
+)
+vae_xl = AutoencoderKL.from_pretrained(
+    "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
 )
 pipe_xl = StableDiffusionXLControlNetPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     controlnet=controlnet_xl,
 refiner.enable_xformers_memory_efficient_attention()
 refiner.enable_model_cpu_offload()
 def resize_image_output(im, width, height):
+    im = np.array(im)
+    newSize = (width, height)
     img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
     img = Image.fromarray(img)
     return img
+def resize_image(im, max_size=590000):
+    [x, y, z] = im.shape
+    new_size = [0, 0]
     min_size = 262144
+    if x * y > max_size:
+        scale_ratio = math.sqrt((x * y) / max_size)
         new_size[0] = int(x / scale_ratio)
         new_size[1] = int(y / scale_ratio)
+    elif x * y <= min_size:
+        scale_ratio = math.sqrt((x * y) / min_size)
         new_size[0] = int(x / scale_ratio)
         new_size[1] = int(y / scale_ratio)
     else:
         new_size[0] = int(x)
         new_size[1] = int(y)
     height = (new_size[0] // 8) * 8
     width = (new_size[1] // 8) * 8
+    newSize = (width, height)
     img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
     return img
+def process_canny_tile(
+    input_image,
+    control_image,
+    x,
+    y,
+    prompt,
+    a_prompt,
+    n_prompt,
+    num_samples,
+    image_resolution,
+    ddim_steps,
+    guess_mode,
+    strength_conditioning,
+    scale,
+    seed,
+    eta,
+    low_threshold,
+    high_threshold,
+):
     image = input_image
     return canny_pipe_img2img(
+        prompt="",
         image=image,
+        control_image=image,
         num_inference_steps=20,
         guidance_scale=4,
+        strength=0.3,
+        guess_mode=True,
         negative_prompt=n_prompt,
         num_images_per_prompt=1,
         eta=eta,
+        generator=torch.Generator(device="cpu").manual_seed(seed),
     )
+def process_canny(
+    input_image,
+    x,
+    y,
+    prompt,
+    a_prompt,
+    n_prompt,
+    num_samples,
+    image_resolution,
+    ddim_steps,
+    guess_mode,
+    strength,
+    scale,
+    seed,
+    eta,
+    low_threshold,
+    high_threshold,
+):
     image = input_image
     return canny_pipe(
+        prompt=",".join([prompt, a_prompt]),
         image=image,
         height=x,
         width=y,
         num_images_per_prompt=num_samples,
         eta=eta,
         controlnet_conditioning_scale=strength,
+        generator=torch.Generator(device="cpu").manual_seed(seed),
     )
+def process_canny_sdxl(
+    input_image,
+    x,
+    y,
+    prompt,
+    a_prompt,
+    n_prompt,
+    num_samples,
+    image_resolution,
+    ddim_steps,
+    guess_mode,
+    strength,
+    scale,
+    seed,
+    eta,
+    low_threshold,
+    high_threshold,
+):
     image = input_image
     image = pipe_xl(
+        prompt=",".join([prompt, a_prompt]),
         image=image,
         height=x,
         width=y,
         eta=eta,
         controlnet_conditioning_scale=strength,
         generator=torch.Generator(device="cpu").manual_seed(seed),
+        output_type="latent",
     ).images
     return refiner(
+        prompt=prompt,
+        num_inference_steps=ddim_steps,
+        num_images_per_prompt=num_samples,
+        denoising_start=0.8,
+        image=image,
     )
+def process(
+    image,
+    prompt,
+    a_prompt,
+    n_prompt,
+    ddim_steps,
+    strength,
+    scale,
+    seed,
+    eta,
+    low_threshold,
+    high_threshold,
+):
     image = load_image(image)
     image = np.array(image)
+    [x_orig, y_orig, z_orig] = image.shape
     image = resize_image(image)
+    [x, y, z] = image.shape
     image = cv2.Canny(image, low_threshold, high_threshold)
     image = image[:, :, None]
     image = np.concatenate([image, image, image], axis=2)
     image = Image.fromarray(image)
+    result = process_canny(
+        image,
+        x,
+        y,
+        prompt,
+        a_prompt,
+        n_prompt,
+        1,
+        None,
+        ddim_steps,
+        False,
+        float(strength),
+        scale,
+        seed,
+        eta,
+        low_threshold,
+        high_threshold,
+    )
+    im = result.images[0]
+    im = resize_image_output(im, y_orig, x_orig)
+    highres = False
+    if highres:
+        result_upscaled = process_canny_tile(
+            im,
+            im,
+            x_orig,
+            y_orig,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            None,
+            ddim_steps,
+            False,
+            strength,
+            scale,
+            seed,
+            eta,
+            low_threshold,
+            high_threshold,
+        )
+        im = result_upscaled.images[0]
+    return im
 demo = gr.Blocks().queue()
             input_prompt = gr.Textbox()
             run_button = gr.Button(label="Run")
+            with gr.Accordion("Advanced Options", open=False):
+                strength = gr.Slider(
+                    label="Control Strength",
+                    minimum=0.0,
+                    maximum=2.0,
+                    value=1.0,
+                    step=0.01,
+                )
+                low_threshold = gr.Slider(
+                    label="Canny low threshold",
+                    minimum=1,
+                    maximum=255,
+                    value=100,
+                    step=1,
+                )
+                high_threshold = gr.Slider(
+                    label="Canny high threshold",
+                    minimum=1,
+                    maximum=255,
+                    value=200,
+                    step=1,
+                )
+                ddim_steps = gr.Slider(
+                    label="Steps", minimum=1, maximum=100, value=20, step=1
+                )
+                scale = gr.Slider(
+                    label="Guidance Scale",
+                    minimum=0.1,
+                    maximum=30.0,
+                    value=7.5,
+                    step=0.1,
+                )  # default value was 9.0
+                seed = gr.Slider(
+                    label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True
+                )
                 eta = gr.Number(label="eta (DDIM)", value=0.0)
+                a_prompt = gr.Textbox(
+                    label="Added Prompt", value="best quality, extremely detailed"
+                )
+                n_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    value="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
+                )
         with gr.Column():
+            result = gr.Image(label="Output", type="numpy")
+    ips = [
+        input_image,
+        input_prompt,
+        a_prompt,
+        n_prompt,
+        ddim_steps,
+        strength,
+        scale,
+        seed,
+        eta,
+        low_threshold,
+        high_threshold,
+    ]
     run_button.click(fn=process, inputs=ips, outputs=[result])