Diffusion-API

Runtime error

App Files Files Community

kadirnar commited on Mar 20, 2023

Commit

a22dacf

1 Parent(s): b5fb0c0

Upload 25 files

Browse files

Files changed (25) hide show

diffusion_webui/__init__.py +1 -0
diffusion_webui/diffusion_models/__init__.py +0 -0
diffusion_webui/diffusion_models/controlnet/__init__.py +0 -0
diffusion_webui/diffusion_models/controlnet/controlnet_canny.py +183 -0
diffusion_webui/diffusion_models/controlnet/controlnet_depth.py +187 -0
diffusion_webui/diffusion_models/controlnet/controlnet_hed.py +181 -0
diffusion_webui/diffusion_models/controlnet/controlnet_inpaint/__init__.py +0 -0
diffusion_webui/diffusion_models/controlnet/controlnet_inpaint/controlnet_inpaint_app.py +203 -0
diffusion_webui/diffusion_models/controlnet/controlnet_inpaint/pipeline_stable_diffusion_controlnet_inpaint.py +607 -0
diffusion_webui/diffusion_models/controlnet/controlnet_mlsd.py +173 -0
diffusion_webui/diffusion_models/controlnet/controlnet_pose.py +189 -0
diffusion_webui/diffusion_models/controlnet/controlnet_scribble.py +188 -0
diffusion_webui/diffusion_models/controlnet/controlnet_seg.py +353 -0
diffusion_webui/diffusion_models/stable_diffusion/__init__.py +0 -0
diffusion_webui/diffusion_models/stable_diffusion/__pycache__/__init__.cpython-38.pyc +0 -0
diffusion_webui/diffusion_models/stable_diffusion/__pycache__/img2img_app.cpython-38.pyc +0 -0
diffusion_webui/diffusion_models/stable_diffusion/__pycache__/inpaint_app.cpython-38.pyc +0 -0
diffusion_webui/diffusion_models/stable_diffusion/__pycache__/text2img_app.cpython-38.pyc +0 -0
diffusion_webui/diffusion_models/stable_diffusion/img2img_app.py +153 -0
diffusion_webui/diffusion_models/stable_diffusion/inpaint_app.py +148 -0
diffusion_webui/diffusion_models/stable_diffusion/text2img_app.py +170 -0
diffusion_webui/helpers.py +33 -0
diffusion_webui/utils/__init__.py +0 -0
diffusion_webui/utils/model_list.py +33 -0
diffusion_webui/utils/scheduler_list.py +47 -0

diffusion_webui/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __version__ = "1.4.1"

diffusion_webui/diffusion_models/__init__.py ADDED Viewed

File without changes

diffusion_webui/diffusion_models/controlnet/__init__.py ADDED Viewed

File without changes

diffusion_webui/diffusion_models/controlnet/controlnet_canny.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import cv2
+import gradio as gr
+import numpy as np
+import torch
+from diffusers import ControlNetModel, StableDiffusionControlNetPipeline
+from PIL import Image
+from diffusion_webui.utils.model_list import (
+    controlnet_canny_model_list,
+    stable_model_list,
+)
+from diffusion_webui.utils.scheduler_list import (
+    SCHEDULER_LIST,
+    get_scheduler_list,
+)
+class StableDiffusionControlNetCannyGenerator:
+    def __init__(self):
+        self.pipe = None
+    def load_model(self, stable_model_path, controlnet_model_path, scheduler):
+        if self.pipe is None:
+            controlnet = ControlNetModel.from_pretrained(
+                controlnet_model_path, torch_dtype=torch.float16
+            )
+            self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
+                pretrained_model_name_or_path=stable_model_path,
+                controlnet=controlnet,
+                safety_checker=None,
+                torch_dtype=torch.float16,
+            )
+        self.pipe = get_scheduler_list(pipe=self.pipe, scheduler=scheduler)
+        self.pipe.to("cuda")
+        self.pipe.enable_xformers_memory_efficient_attention()
+        return self.pipe
+    def controlnet_canny(
+        self,
+        image_path: str,
+    ):
+        image = Image.open(image_path)
+        image = np.array(image)
+        image = cv2.Canny(image, 100, 200)
+        image = image[:, :, None]
+        image = np.concatenate([image, image, image], axis=2)
+        image = Image.fromarray(image)
+        return image
+    def generate_image(
+        self,
+        image_path: str,
+        stable_model_path: str,
+        controlnet_model_path: str,
+        prompt: str,
+        negative_prompt: str,
+        num_images_per_prompt: int,
+        guidance_scale: int,
+        num_inference_step: int,
+        scheduler: str,
+        seed_generator: int,
+    ):
+        pipe = self.load_model(
+            stable_model_path=stable_model_path,
+            controlnet_model_path=controlnet_model_path,
+            scheduler=scheduler,
+        )
+        image = self.controlnet_canny(image_path=image_path)
+        if seed_generator == 0:
+            random_seed = torch.randint(0, 1000000, (1,))
+            generator = torch.manual_seed(random_seed)
+        else:
+            generator = torch.manual_seed(seed_generator)
+        output = pipe(
+            prompt=prompt,
+            image=image,
+            negative_prompt=negative_prompt,
+            num_images_per_prompt=num_images_per_prompt,
+            num_inference_steps=num_inference_step,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        ).images
+        return output
+    def app():
+        with gr.Blocks():
+            with gr.Row():
+                with gr.Column():
+                    controlnet_canny_image_file = gr.Image(
+                        type="filepath", label="Image"
+                    )
+                    controlnet_canny_prompt = gr.Textbox(
+                        lines=1,
+                        placeholder="Prompt",
+                        show_label=False,
+                    )
+                    controlnet_canny_negative_prompt = gr.Textbox(
+                        lines=1,
+                        placeholder="Negative Prompt",
+                        show_label=False,
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            controlnet_canny_stable_model_id = gr.Dropdown(
+                                choices=stable_model_list,
+                                value=stable_model_list[0],
+                                label="Stable Model Id",
+                            )
+                            controlnet_canny_guidance_scale = gr.Slider(
+                                minimum=0.1,
+                                maximum=15,
+                                step=0.1,
+                                value=7.5,
+                                label="Guidance Scale",
+                            )
+                            controlnet_canny_num_inference_step = gr.Slider(
+                                minimum=1,
+                                maximum=100,
+                                step=1,
+                                value=50,
+                                label="Num Inference Step",
+                            )
+                            controlnet_canny_num_images_per_prompt = gr.Slider(
+                                minimum=1,
+                                maximum=10,
+                                step=1,
+                                value=1,
+                                label="Number Of Images",
+                            )
+                        with gr.Row():
+                            with gr.Column():
+                                controlnet_canny_model_id = gr.Dropdown(
+                                    choices=controlnet_canny_model_list,
+                                    value=controlnet_canny_model_list[0],
+                                    label="ControlNet Model Id",
+                                )
+                                controlnet_canny_scheduler = gr.Dropdown(
+                                    choices=SCHEDULER_LIST,
+                                    value=SCHEDULER_LIST[0],
+                                    label="Scheduler",
+                                )
+                                controlnet_canny_seed_generator = gr.Number(
+                                    value=0,
+                                    label="Seed Generator",
+                                )
+                    controlnet_canny_predict = gr.Button(value="Generator")
+                with gr.Column():
+                    output_image = gr.Gallery(
+                        label="Generated images",
+                        show_label=False,
+                        elem_id="gallery",
+                    ).style(grid=(1, 2))
+            controlnet_canny_predict.click(
+                fn=StableDiffusionControlNetCannyGenerator().generate_image,
+                inputs=[
+                    controlnet_canny_image_file,
+                    controlnet_canny_stable_model_id,
+                    controlnet_canny_model_id,
+                    controlnet_canny_prompt,
+                    controlnet_canny_negative_prompt,
+                    controlnet_canny_num_images_per_prompt,
+                    controlnet_canny_guidance_scale,
+                    controlnet_canny_num_inference_step,
+                    controlnet_canny_scheduler,
+                    controlnet_canny_seed_generator,
+                ],
+                outputs=[output_image],
+            )

diffusion_webui/diffusion_models/controlnet/controlnet_depth.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import gradio as gr
+import numpy as np
+import torch
+from diffusers import ControlNetModel, StableDiffusionControlNetPipeline
+from PIL import Image
+from transformers import pipeline
+from diffusion_webui.utils.model_list import (
+    controlnet_depth_model_list,
+    stable_model_list,
+)
+from diffusion_webui.utils.scheduler_list import (
+    SCHEDULER_LIST,
+    get_scheduler_list,
+)
+class StableDiffusionControlNetDepthGenerator:
+    def __init__(self):
+        self.pipe = None
+    def load_model(self, stable_model_path, controlnet_model_path, scheduler):
+        if self.pipe is None:
+            controlnet = ControlNetModel.from_pretrained(
+                controlnet_model_path, torch_dtype=torch.float16
+            )
+            self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
+                pretrained_model_name_or_path=stable_model_path,
+                controlnet=controlnet,
+                safety_checker=None,
+                torch_dtype=torch.float16,
+            )
+        self.pipe = get_scheduler_list(pipe=self.pipe, scheduler=scheduler)
+        self.pipe.to("cuda")
+        self.pipe.enable_xformers_memory_efficient_attention()
+        return self.pipe
+    def controlnet_depth(self, image_path: str):
+        depth_estimator = pipeline("depth-estimation")
+        image = Image.open(image_path)
+        image = depth_estimator(image)["depth"]
+        image = np.array(image)
+        image = image[:, :, None]
+        image = np.concatenate([image, image, image], axis=2)
+        image = Image.fromarray(image)
+        return image
+    def generate_image(
+        self,
+        image_path: str,
+        stable_model_path: str,
+        depth_model_path: str,
+        prompt: str,
+        negative_prompt: str,
+        num_images_per_prompt: int,
+        guidance_scale: int,
+        num_inference_step: int,
+        scheduler: str,
+        seed_generator: int,
+    ):
+        image = self.controlnet_depth(image_path)
+        pipe = self.load_model(
+            stable_model_path=stable_model_path,
+            controlnet_model_path=depth_model_path,
+            scheduler=scheduler,
+        )
+        if seed_generator == 0:
+            random_seed = torch.randint(0, 1000000, (1,))
+            generator = torch.manual_seed(random_seed)
+        else:
+            generator = torch.manual_seed(seed_generator)
+        output = pipe(
+            prompt=prompt,
+            image=image,
+            negative_prompt=negative_prompt,
+            num_images_per_prompt=num_images_per_prompt,
+            num_inference_steps=num_inference_step,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        ).images
+        return output
+    def app():
+        with gr.Blocks():
+            with gr.Row():
+                with gr.Column():
+                    controlnet_depth_image_file = gr.Image(
+                        type="filepath", label="Image"
+                    )
+                    controlnet_depth_prompt = gr.Textbox(
+                        lines=1,
+                        show_label=False,
+                        placeholder="Prompt",
+                    )
+                    controlnet_depth_negative_prompt = gr.Textbox(
+                        lines=1,
+                        show_label=False,
+                        placeholder="Negative Prompt",
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            controlnet_depth_stable_model_id = gr.Dropdown(
+                                choices=stable_model_list,
+                                value=stable_model_list[0],
+                                label="Stable Model Id",
+                            )
+                            controlnet_depth_guidance_scale = gr.Slider(
+                                minimum=0.1,
+                                maximum=15,
+                                step=0.1,
+                                value=7.5,
+                                label="Guidance Scale",
+                            )
+                            controlnet_depth_num_inference_step = gr.Slider(
+                                minimum=1,
+                                maximum=100,
+                                step=1,
+                                value=50,
+                                label="Num Inference Step",
+                            )
+                            controlnet_depth_num_images_per_prompt = gr.Slider(
+                                minimum=1,
+                                maximum=10,
+                                step=1,
+                                value=1,
+                                label="Number Of Images",
+                            )
+                        with gr.Row():
+                            with gr.Column():
+                                controlnet_depth_model_id = gr.Dropdown(
+                                    choices=controlnet_depth_model_list,
+                                    value=controlnet_depth_model_list[0],
+                                    label="ControlNet Model Id",
+                                )
+                                controlnet_depth_scheduler = gr.Dropdown(
+                                    choices=SCHEDULER_LIST,
+                                    value=SCHEDULER_LIST[0],
+                                    label="Scheduler",
+                                )
+                                controlnet_depth_seed_generator = gr.Number(
+                                    minimum=0,
+                                    maximum=1000000,
+                                    step=1,
+                                    value=0,
+                                    label="Seed Generator",
+                                )
+                    controlnet_depth_predict = gr.Button(value="Generator")
+                with gr.Column():
+                    output_image = gr.Gallery(
+                        label="Generated images",
+                        show_label=False,
+                        elem_id="gallery",
+                    ).style(grid=(1, 2))
+            controlnet_depth_predict.click(
+                fn=StableDiffusionControlNetDepthGenerator().generate_image,
+                inputs=[
+                    controlnet_depth_image_file,
+                    controlnet_depth_stable_model_id,
+                    controlnet_depth_model_id,
+                    controlnet_depth_prompt,
+                    controlnet_depth_negative_prompt,
+                    controlnet_depth_num_images_per_prompt,
+                    controlnet_depth_guidance_scale,
+                    controlnet_depth_num_inference_step,
+                    controlnet_depth_scheduler,
+                    controlnet_depth_seed_generator,
+                ],
+                outputs=output_image,
+            )

diffusion_webui/diffusion_models/controlnet/controlnet_hed.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import gradio as gr
+import torch
+from controlnet_aux import HEDdetector
+from diffusers import ControlNetModel, StableDiffusionControlNetPipeline
+from PIL import Image
+from diffusion_webui.utils.model_list import (
+    controlnet_hed_model_list,
+    stable_model_list,
+)
+from diffusion_webui.utils.scheduler_list import (
+    SCHEDULER_LIST,
+    get_scheduler_list,
+)
+class StableDiffusionControlNetHEDGenerator:
+    def __init__(self):
+        self.pipe = None
+    def load_model(self, stable_model_path, controlnet_model_path, scheduler):
+        if self.pipe is None:
+            controlnet = ControlNetModel.from_pretrained(
+                controlnet_model_path, torch_dtype=torch.float16
+            )
+            self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
+                pretrained_model_name_or_path=stable_model_path,
+                controlnet=controlnet,
+                safety_checker=None,
+                torch_dtype=torch.float16,
+            )
+        self.pipe = get_scheduler_list(pipe=self.pipe, scheduler=scheduler)
+        self.pipe.to("cuda")
+        self.pipe.enable_xformers_memory_efficient_attention()
+        return self.pipe
+    def controlnet_hed(self, image_path: str):
+        hed = HEDdetector.from_pretrained("lllyasviel/ControlNet")
+        image = Image.open(image_path)
+        image = hed(image)
+        return image
+    def generate_image(
+        self,
+        image_path: str,
+        stable_model_path: str,
+        controlnet_hed_model_path: str,
+        prompt: str,
+        negative_prompt: str,
+        num_images_per_prompt: int,
+        guidance_scale: int,
+        num_inference_step: int,
+        sheduler: str,
+        seed_generator: int,
+    ):
+        image = self.controlnet_hed(image_path=image_path)
+        pipe = self.load_model(
+            stable_model_path=stable_model_path,
+            controlnet_model_path=controlnet_hed_model_path,
+            scheduler=sheduler,
+        )
+        if seed_generator == 0:
+            random_seed = torch.randint(0, 1000000, (1,))
+            generator = torch.manual_seed(random_seed)
+        else:
+            generator = torch.manual_seed(seed_generator)
+        output = pipe(
+            prompt=prompt,
+            image=image,
+            negative_prompt=negative_prompt,
+            num_images_per_prompt=num_images_per_prompt,
+            num_inference_steps=num_inference_step,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        ).images
+        return output
+    def app():
+        with gr.Blocks():
+            with gr.Row():
+                with gr.Column():
+                    controlnet_hed_image_file = gr.Image(
+                        type="filepath", label="Image"
+                    )
+                    controlnet_hed_prompt = gr.Textbox(
+                        lines=1,
+                        show_label=False,
+                        placeholder="Prompt",
+                    )
+                    controlnet_hed_negative_prompt = gr.Textbox(
+                        lines=1,
+                        show_label=False,
+                        placeholder="Negative Prompt",
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            controlnet_hed_stable_model_id = gr.Dropdown(
+                                choices=stable_model_list,
+                                value=stable_model_list[0],
+                                label="Stable Model Id",
+                            )
+                            controlnet_hed_guidance_scale = gr.Slider(
+                                minimum=0.1,
+                                maximum=15,
+                                step=0.1,
+                                value=7.5,
+                                label="Guidance Scale",
+                            )
+                            controlnet_hed_num_inference_step = gr.Slider(
+                                minimum=1,
+                                maximum=100,
+                                step=1,
+                                value=50,
+                                label="Num Inference Step",
+                            )
+                            controlnet_hed_num_images_per_prompt = gr.Slider(
+                                minimum=1,
+                                maximum=10,
+                                step=1,
+                                value=1,
+                                label="Number Of Images",
+                            )
+                        with gr.Row():
+                            with gr.Column():
+                                controlnet_hed_model_id = gr.Dropdown(
+                                    choices=controlnet_hed_model_list,
+                                    value=controlnet_hed_model_list[0],
+                                    label="ControlNet Model Id",
+                                )
+                                controlnet_hed_scheduler = gr.Dropdown(
+                                    choices=SCHEDULER_LIST,
+                                    value=SCHEDULER_LIST[0],
+                                    label="Scheduler",
+                                )
+                                controlnet_hed_seed_generator = gr.Number(
+                                    minimum=0,
+                                    maximum=1000000,
+                                    step=1,
+                                    value=0,
+                                    label="Seed Generator",
+                                )
+                    controlnet_hed_predict = gr.Button(value="Generator")
+                with gr.Column():
+                    output_image = gr.Gallery(
+                        label="Generated images",
+                        show_label=False,
+                        elem_id="gallery",
+                    ).style(grid=(1, 2))
+            controlnet_hed_predict.click(
+                fn=StableDiffusionControlNetHEDGenerator().generate_image,
+                inputs=[
+                    controlnet_hed_image_file,
+                    controlnet_hed_stable_model_id,
+                    controlnet_hed_model_id,
+                    controlnet_hed_prompt,
+                    controlnet_hed_negative_prompt,
+                    controlnet_hed_num_images_per_prompt,
+                    controlnet_hed_guidance_scale,
+                    controlnet_hed_num_inference_step,
+                    controlnet_hed_scheduler,
+                    controlnet_hed_seed_generator,
+                ],
+                outputs=[output_image],
+            )

diffusion_webui/diffusion_models/controlnet/controlnet_inpaint/__init__.py ADDED Viewed

File without changes

diffusion_webui/diffusion_models/controlnet/controlnet_inpaint/controlnet_inpaint_app.py ADDED Viewed

	@@ -0,0 +1,203 @@

+import cv2
+import gradio as gr
+import numpy as np
+import torch
+from diffusers import (
+    ControlNetModel,
+    StableDiffusionControlNetPipeline,
+)
+from PIL import Image
+from diffusion_webui.utils.model_list import (
+    controlnet_canny_model_list,
+    stable_model_list,
+)
+from diffusion_webui.utils.scheduler_list import (
+    SCHEDULER_LIST,
+    get_scheduler_list,
+)
+class StableDiffusionControlInpaintNetCannyGenerator:
+    def __init__(self):
+        self.pipe = None
+    def controlnet_canny_inpaint(
+        self,
+        image_path: str,
+    ):
+        image = Image.open(image_path)
+        image = np.array(image)
+        image = cv2.Canny(image, 100, 200)
+        image = image[:, :, None]
+        image = np.concatenate([image, image, image], axis=2)
+        image = Image.fromarray(image)
+        return image
+    def load_model(self, stable_model_path, controlnet_model_path, scheduler):
+        if self.pipe is None:
+            controlnet = ControlNetModel.from_pretrained(
+                controlnet_model_path, torch_dtype=torch.float16
+            )
+            self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
+                pretrained_model_name_or_path=stable_model_path,
+                controlnet=controlnet,
+                safety_checker=None,
+                torch_dtype=torch.float16,
+            )
+        self.pipe = get_scheduler_list(pipe=self.pipe, scheduler=scheduler)
+        self.pipe.to("cuda")
+        self.pipe.enable_xformers_memory_efficient_attention()
+        return self.pipe
+    def generate_image(
+        self,
+        image_path: str,
+        stable_model_path: str,
+        controlnet_model_path: str,
+        prompt: str,
+        negative_prompt: str,
+        num_images_per_prompt: int,
+        guidance_scale: int,
+        num_inference_step: int,
+        scheduler: str,
+        seed_generator: int,
+    ):
+        image = self.controlnet_canny_inpaint(
+            image_path=image_path, controlnet_model_path=controlnet_model_path
+        )
+        pipe = self.load_model(
+            stable_model_path=stable_model_path,
+            controlnet_model_path=controlnet_model_path,
+            scheduler=scheduler,
+        )
+        if seed_generator == 0:
+            random_seed = torch.randint(0, 1000000, (1,))
+            generator = torch.manual_seed(random_seed)
+        else:
+            generator = torch.manual_seed(seed_generator)
+        output = pipe(
+            prompt=prompt,
+            image=image,
+            negative_prompt=negative_prompt,
+            num_images_per_prompt=num_images_per_prompt,
+            num_inference_steps=num_inference_step,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        ).images
+        return output
+    def app():
+        with gr.Blocks():
+            with gr.Row():
+                with gr.Column():
+                    controlnet_canny_inpaint_image_file = gr.Image(
+                        type="filepath", label="Image"
+                    )
+                    controlnet_canny_inpaint_prompt = gr.Textbox(
+                        lines=1, placeholder="Prompt", show_label=False
+                    )
+                    controlnet_canny_inpaint_negative_prompt = gr.Textbox(
+                        lines=1,
+                        show_label=False,
+                        placeholder="Negative Prompt",
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            controlnet_canny_inpaint_stable_model_id = (
+                                gr.Dropdown(
+                                    choices=stable_model_list,
+                                    value=stable_model_list[0],
+                                    label="Stable Model Id",
+                                )
+                            )
+                            controlnet_canny_inpaint_guidance_scale = gr.Slider(
+                                minimum=0.1,
+                                maximum=15,
+                                step=0.1,
+                                value=7.5,
+                                label="Guidance Scale",
+                            )
+                            controlnet_canny_inpaint_num_inference_step = (
+                                gr.Slider(
+                                    minimum=1,
+                                    maximum=100,
+                                    step=1,
+                                    value=50,
+                                    label="Num Inference Step",
+                                )
+                            )
+                            controlnet_canny_inpaint_num_images_per_prompt = (
+                                gr.Slider(
+                                    minimum=1,
+                                    maximum=10,
+                                    step=1,
+                                    value=1,
+                                    label="Number Of Images",
+                                )
+                            )
+                        with gr.Row():
+                            with gr.Column():
+                                controlnet_canny_inpaint_model_id = gr.Dropdown(
+                                    choices=controlnet_canny_model_list,
+                                    value=controlnet_canny_model_list[0],
+                                    label="Controlnet Model Id",
+                                )
+                                controlnet_canny_inpaint_scheduler = (
+                                    gr.Dropdown(
+                                        choices=SCHEDULER_LIST,
+                                        value=SCHEDULER_LIST[0],
+                                        label="Scheduler",
+                                    )
+                                )
+                                controlnet_canny_inpaint_seed_generator = (
+                                    gr.Slider(
+                                        minimum=0,
+                                        maximum=1000000,
+                                        step=1,
+                                        value=0,
+                                        label="Seed Generator",
+                                    )
+                                )
+                    controlnet_canny_inpaint_predict = gr.Button(
+                        value="Generator"
+                    )
+                with gr.Column():
+                    output_image = gr.Gallery(
+                        label="Generated images",
+                        show_label=False,
+                        elem_id="gallery",
+                    ).style(grid=(1, 2))
+            controlnet_canny_inpaint_predict.click(
+                fn=StableDiffusionControlInpaintNetCannyGenerator().generate_image,
+                inputs=[
+                    controlnet_canny_inpaint_image_file,
+                    controlnet_canny_inpaint_stable_model_id,
+                    controlnet_canny_inpaint_model_id,
+                    controlnet_canny_inpaint_prompt,
+                    controlnet_canny_inpaint_negative_prompt,
+                    controlnet_canny_inpaint_num_images_per_prompt,
+                    controlnet_canny_inpaint_guidance_scale,
+                    controlnet_canny_inpaint_num_inference_step,
+                    controlnet_canny_inpaint_scheduler,
+                    controlnet_canny_inpaint_seed_generator,
+                ],
+                outputs=[output_image],
+            )

diffusion_webui/diffusion_models/controlnet/controlnet_inpaint/pipeline_stable_diffusion_controlnet_inpaint.py ADDED Viewed

	@@ -0,0 +1,607 @@

+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import PIL.Image
+import torch
+from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import *
+EXAMPLE_DOC_STRING = """
+    Examples:
+        ```py
+        >>> # !pip install opencv-python transformers accelerate
+        >>> from diffusers import StableDiffusionControlNetInpaintPipeline, ControlNetModel, UniPCMultistepScheduler
+        >>> from diffusers.utils import load_image
+        >>> import numpy as np
+        >>> import torch
+        >>> import cv2
+        >>> from PIL import Image
+        >>> # download an image
+        >>> image = load_image(
+        ...     "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
+        ... )
+        >>> image = np.array(image)
+        >>> mask_image = load_image(
+        ...     "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
+        ... )
+        >>> mask_image = np.array(mask_image)
+        >>> # get canny image
+        >>> canny_image = cv2.Canny(image, 100, 200)
+        >>> canny_image = canny_image[:, :, None]
+        >>> canny_image = np.concatenate([canny_image, canny_image, canny_image], axis=2)
+        >>> canny_image = Image.fromarray(canny_image)
+        >>> # load control net and stable diffusion v1-5
+        >>> controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
+        >>> pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
+        ...     "runwayml/stable-diffusion-inpainting", controlnet=controlnet, torch_dtype=torch.float16
+        ... )
+        >>> # speed up diffusion process with faster scheduler and memory optimization
+        >>> pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+        >>> # remove following line if xformers is not installed
+        >>> pipe.enable_xformers_memory_efficient_attention()
+        >>> pipe.enable_model_cpu_offload()
+        >>> # generate image
+        >>> generator = torch.manual_seed(0)
+        >>> image = pipe(
+        ...     "futuristic-looking doggo",
+        ...     num_inference_steps=20,
+        ...     generator=generator,
+        ...     image=image,
+        ...     control_image=canny_image,
+        ...     mask_image=mask_image
+        ... ).images[0]
+        ```
+"""
+def prepare_mask_and_masked_image(image, mask):
+    """
+    Prepares a pair (image, mask) to be consumed by the Stable Diffusion pipeline. This means that those inputs will be
+    converted to ``torch.Tensor`` with shapes ``batch x channels x height x width`` where ``channels`` is ``3`` for the
+    ``image`` and ``1`` for the ``mask``.
+    The ``image`` will be converted to ``torch.float32`` and normalized to be in ``[-1, 1]``. The ``mask`` will be
+    binarized (``mask > 0.5``) and cast to ``torch.float32`` too.
+    Args:
+        image (Union[np.array, PIL.Image, torch.Tensor]): The image to inpaint.
+            It can be a ``PIL.Image``, or a ``height x width x 3`` ``np.array`` or a ``channels x height x width``
+            ``torch.Tensor`` or a ``batch x channels x height x width`` ``torch.Tensor``.
+        mask (_type_): The mask to apply to the image, i.e. regions to inpaint.
+            It can be a ``PIL.Image``, or a ``height x width`` ``np.array`` or a ``1 x height x width``
+            ``torch.Tensor`` or a ``batch x 1 x height x width`` ``torch.Tensor``.
+    Raises:
+        ValueError: ``torch.Tensor`` images should be in the ``[-1, 1]`` range. ValueError: ``torch.Tensor`` mask
+        should be in the ``[0, 1]`` range. ValueError: ``mask`` and ``image`` should have the same spatial dimensions.
+        TypeError: ``mask`` is a ``torch.Tensor`` but ``image`` is not
+            (ot the other way around).
+    Returns:
+        tuple[torch.Tensor]: The pair (mask, masked_image) as ``torch.Tensor`` with 4
+            dimensions: ``batch x channels x height x width``.
+    """
+    if isinstance(image, torch.Tensor):
+        if not isinstance(mask, torch.Tensor):
+            raise TypeError(
+                f"`image` is a torch.Tensor but `mask` (type: {type(mask)} is not"
+            )
+        # Batch single image
+        if image.ndim == 3:
+            assert (
+                image.shape[0] == 3
+            ), "Image outside a batch should be of shape (3, H, W)"
+            image = image.unsqueeze(0)
+        # Batch and add channel dim for single mask
+        if mask.ndim == 2:
+            mask = mask.unsqueeze(0).unsqueeze(0)
+        # Batch single mask or add channel dim
+        if mask.ndim == 3:
+            # Single batched mask, no channel dim or single mask not batched but channel dim
+            if mask.shape[0] == 1:
+                mask = mask.unsqueeze(0)
+            # Batched masks no channel dim
+            else:
+                mask = mask.unsqueeze(1)
+        assert (
+            image.ndim == 4 and mask.ndim == 4
+        ), "Image and Mask must have 4 dimensions"
+        assert (
+            image.shape[-2:] == mask.shape[-2:]
+        ), "Image and Mask must have the same spatial dimensions"
+        assert (
+            image.shape[0] == mask.shape[0]
+        ), "Image and Mask must have the same batch size"
+        # Check image is in [-1, 1]
+        if image.min() < -1 or image.max() > 1:
+            raise ValueError("Image should be in [-1, 1] range")
+        # Check mask is in [0, 1]
+        if mask.min() < 0 or mask.max() > 1:
+            raise ValueError("Mask should be in [0, 1] range")
+        # Binarize mask
+        mask[mask < 0.5] = 0
+        mask[mask >= 0.5] = 1
+        # Image as float32
+        image = image.to(dtype=torch.float32)
+    elif isinstance(mask, torch.Tensor):
+        raise TypeError(
+            f"`mask` is a torch.Tensor but `image` (type: {type(image)} is not"
+        )
+    else:
+        # preprocess image
+        if isinstance(image, (PIL.Image.Image, np.ndarray)):
+            image = [image]
+        if isinstance(image, list) and isinstance(image[0], PIL.Image.Image):
+            image = [np.array(i.convert("RGB"))[None, :] for i in image]
+            image = np.concatenate(image, axis=0)
+        elif isinstance(image, list) and isinstance(image[0], np.ndarray):
+            image = np.concatenate([i[None, :] for i in image], axis=0)
+        image = image.transpose(0, 3, 1, 2)
+        image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
+        # preprocess mask
+        if isinstance(mask, (PIL.Image.Image, np.ndarray)):
+            mask = [mask]
+        if isinstance(mask, list) and isinstance(mask[0], PIL.Image.Image):
+            mask = np.concatenate(
+                [np.array(m.convert("L"))[None, None, :] for m in mask], axis=0
+            )
+            mask = mask.astype(np.float32) / 255.0
+        elif isinstance(mask, list) and isinstance(mask[0], np.ndarray):
+            mask = np.concatenate([m[None, None, :] for m in mask], axis=0)
+        mask[mask < 0.5] = 0
+        mask[mask >= 0.5] = 1
+        mask = torch.from_numpy(mask)
+    masked_image = image * (mask < 0.5)
+    return mask, masked_image
+class StableDiffusionControlNetInpaintPipeline(
+    StableDiffusionControlNetPipeline
+):
+    r"""
+    Pipeline for text-guided image inpainting using Stable Diffusion with ControlNet guidance.
+    This model inherits from [`StableDiffusionControlNetPipeline`]. Check the superclass documentation for the generic methods the
+    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
+    Args:
+        vae ([`AutoencoderKL`]):
+            Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
+        text_encoder ([`CLIPTextModel`]):
+            Frozen text-encoder. Stable Diffusion uses the text portion of
+            [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically
+            the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant.
+        tokenizer (`CLIPTokenizer`):
+            Tokenizer of class
+            [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
+        unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents.
+        controlnet ([`ControlNetModel`]):
+            Provides additional conditioning to the unet during the denoising process
+        scheduler ([`SchedulerMixin`]):
+            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
+            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
+        safety_checker ([`StableDiffusionSafetyChecker`]):
+            Classification module that estimates whether generated images could be considered offensive or harmful.
+            Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details.
+        feature_extractor ([`CLIPFeatureExtractor`]):
+            Model that extracts features from generated images to be used as inputs for the `safety_checker`.
+    """
+    def prepare_mask_latents(
+        self,
+        mask,
+        masked_image,
+        batch_size,
+        height,
+        width,
+        dtype,
+        device,
+        generator,
+        do_classifier_free_guidance,
+    ):
+        # resize the mask to latents shape as we concatenate the mask to the latents
+        # we do that before converting to dtype to avoid breaking in case we're using cpu_offload
+        # and half precision
+        mask = torch.nn.functional.interpolate(
+            mask,
+            size=(
+                height // self.vae_scale_factor,
+                width // self.vae_scale_factor,
+            ),
+        )
+        mask = mask.to(device=device, dtype=dtype)
+        masked_image = masked_image.to(device=device, dtype=dtype)
+        # encode the mask image into latents space so we can concatenate it to the latents
+        if isinstance(generator, list):
+            masked_image_latents = [
+                self.vae.encode(masked_image[i : i + 1]).latent_dist.sample(
+                    generator=generator[i]
+                )
+                for i in range(batch_size)
+            ]
+            masked_image_latents = torch.cat(masked_image_latents, dim=0)
+        else:
+            masked_image_latents = self.vae.encode(
+                masked_image
+            ).latent_dist.sample(generator=generator)
+        masked_image_latents = (
+            self.vae.config.scaling_factor * masked_image_latents
+        )
+        # duplicate mask and masked_image_latents for each generation per prompt, using mps friendly method
+        if mask.shape[0] < batch_size:
+            if not batch_size % mask.shape[0] == 0:
+                raise ValueError(
+                    "The passed mask and the required batch size don't match. Masks are supposed to be duplicated to"
+                    f" a total batch size of {batch_size}, but {mask.shape[0]} masks were passed. Make sure the number"
+                    " of masks that you pass is divisible by the total requested batch size."
+                )
+            mask = mask.repeat(batch_size // mask.shape[0], 1, 1, 1)
+        if masked_image_latents.shape[0] < batch_size:
+            if not batch_size % masked_image_latents.shape[0] == 0:
+                raise ValueError(
+                    "The passed images and the required batch size don't match. Images are supposed to be duplicated"
+                    f" to a total batch size of {batch_size}, but {masked_image_latents.shape[0]} images were passed."
+                    " Make sure the number of images that you pass is divisible by the total requested batch size."
+                )
+            masked_image_latents = masked_image_latents.repeat(
+                batch_size // masked_image_latents.shape[0], 1, 1, 1
+            )
+        mask = torch.cat([mask] * 2) if do_classifier_free_guidance else mask
+        masked_image_latents = (
+            torch.cat([masked_image_latents] * 2)
+            if do_classifier_free_guidance
+            else masked_image_latents
+        )
+        # aligning device to prevent device errors when concating it with the latent model input
+        masked_image_latents = masked_image_latents.to(
+            device=device, dtype=dtype
+        )
+        return mask, masked_image_latents
+    @torch.no_grad()
+    @replace_example_docstring(EXAMPLE_DOC_STRING)
+    def __call__(
+        self,
+        prompt: Union[str, List[str]] = None,
+        image: Union[torch.FloatTensor, PIL.Image.Image] = None,
+        control_image: Union[
+            torch.FloatTensor,
+            PIL.Image.Image,
+            List[torch.FloatTensor],
+            List[PIL.Image.Image],
+        ] = None,
+        mask_image: Union[torch.FloatTensor, PIL.Image.Image] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
+        num_inference_steps: int = 50,
+        guidance_scale: float = 7.5,
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        num_images_per_prompt: Optional[int] = 1,
+        eta: float = 0.0,
+        generator: Optional[
+            Union[torch.Generator, List[torch.Generator]]
+        ] = None,
+        latents: Optional[torch.FloatTensor] = None,
+        prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        callback: Optional[
+            Callable[[int, int, torch.FloatTensor], None]
+        ] = None,
+        callback_steps: int = 1,
+        cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+        controlnet_conditioning_scale: float = 1.0,
+    ):
+        r"""
+        Function invoked when calling the pipeline for generation.
+        Args:
+            prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
+                instead.
+            image (`PIL.Image.Image`):
+                `Image`, or tensor representing an image batch which will be inpainted, *i.e.* parts of the image will
+                be masked out with `mask_image` and repainted according to `prompt`.
+            control_image (`torch.FloatTensor`, `PIL.Image.Image`, `List[torch.FloatTensor]` or `List[PIL.Image.Image]`):
+                The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If
+                the type is specified as `Torch.FloatTensor`, it is passed to ControlNet as is. PIL.Image.Image` can
+                also be accepted as an image. The control image is automatically resized to fit the output image.
+            mask_image (`PIL.Image.Image`):
+                `Image`, or tensor representing an image batch, to mask `image`. White pixels in the mask will be
+                repainted, while black pixels will be preserved. If `mask_image` is a PIL image, it will be converted
+                to a single channel (luminance) before use. If it's a tensor, it should contain one color channel (L)
+                instead of 3, so the expected shape would be `(B, H, W, 1)`.
+            height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+                The height in pixels of the generated image.
+            width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+                The width in pixels of the generated image.
+            num_inference_steps (`int`, *optional*, defaults to 50):
+                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+                expense of slower inference.
+            guidance_scale (`float`, *optional*, defaults to 7.5):
+                Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
+                `guidance_scale` is defined as `w` of equation 2. of [Imagen
+                Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
+                1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
+                usually at the expense of lower image quality.
+            negative_prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation. If not defined, one has to pass
+                `negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
+                Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
+            num_images_per_prompt (`int`, *optional*, defaults to 1):
+                The number of images to generate per prompt.
+            eta (`float`, *optional*, defaults to 0.0):
+                Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
+                [`schedulers.DDIMScheduler`], will be ignored for others.
+            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
+                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
+                to make generation deterministic.
+            latents (`torch.FloatTensor`, *optional*):
+                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
+                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
+                tensor will ge generated by sampling using the supplied random `generator`.
+            prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+                provided, text embeddings will be generated from `prompt` input argument.
+            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+                argument.
+            output_type (`str`, *optional*, defaults to `"pil"`):
+                The output format of the generate image. Choose between
+                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
+                plain tuple.
+            callback (`Callable`, *optional*):
+                A function that will be called every `callback_steps` steps during inference. The function will be
+                called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
+            callback_steps (`int`, *optional*, defaults to 1):
+                The frequency at which the `callback` function will be called. If not specified, the callback will be
+                called at every step.
+            cross_attention_kwargs (`dict`, *optional*):
+                A kwargs dictionary that if specified is passed along to the `AttnProcessor` as defined under
+                `self.processor` in
+                [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
+            controlnet_conditioning_scale (`float`, *optional*, defaults to 1.0):
+                The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
+                to the residual in the original unet.
+        Examples:
+        Returns:
+            [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
+            [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple.
+            When returning a tuple, the first element is a list with the generated images, and the second element is a
+            list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
+            (nsfw) content, according to the `safety_checker`.
+        """
+        # 0. Default height and width to unet
+        height, width = self._default_height_width(height, width, control_image)
+        # 1. Check inputs. Raise error if not correct
+        self.check_inputs(
+            prompt,
+            control_image,
+            height,
+            width,
+            callback_steps,
+            negative_prompt,
+            prompt_embeds,
+            negative_prompt_embeds,
+        )
+        # 2. Define call parameters
+        if prompt is not None and isinstance(prompt, str):
+            batch_size = 1
+        elif prompt is not None and isinstance(prompt, list):
+            batch_size = len(prompt)
+        else:
+            batch_size = prompt_embeds.shape[0]
+        device = self._execution_device
+        # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
+        # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
+        # corresponds to doing no classifier free guidance.
+        do_classifier_free_guidance = guidance_scale > 1.0
+        # 3. Encode input prompt
+        prompt_embeds = self._encode_prompt(
+            prompt,
+            device,
+            num_images_per_prompt,
+            do_classifier_free_guidance,
+            negative_prompt,
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+        )
+        # 4. Prepare image
+        control_image = self.prepare_image(
+            control_image,
+            width,
+            height,
+            batch_size * num_images_per_prompt,
+            num_images_per_prompt,
+            device,
+            self.controlnet.dtype,
+        )
+        if do_classifier_free_guidance:
+            control_image = torch.cat([control_image] * 2)
+        # 5. Prepare timesteps
+        self.scheduler.set_timesteps(num_inference_steps, device=device)
+        timesteps = self.scheduler.timesteps
+        # 6. Prepare latent variables
+        num_channels_latents = self.controlnet.in_channels
+        latents = self.prepare_latents(
+            batch_size * num_images_per_prompt,
+            num_channels_latents,
+            height,
+            width,
+            prompt_embeds.dtype,
+            device,
+            generator,
+            latents,
+        )
+        # EXTRA: prepare mask latents
+        mask, masked_image = prepare_mask_and_masked_image(image, mask_image)
+        mask, masked_image_latents = self.prepare_mask_latents(
+            mask,
+            masked_image,
+            batch_size * num_images_per_prompt,
+            height,
+            width,
+            prompt_embeds.dtype,
+            device,
+            generator,
+            do_classifier_free_guidance,
+        )
+        # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+        # 8. Denoising loop
+        num_warmup_steps = (
+            len(timesteps) - num_inference_steps * self.scheduler.order
+        )
+        with self.progress_bar(total=num_inference_steps) as progress_bar:
+            for i, t in enumerate(timesteps):
+                # expand the latents if we are doing classifier free guidance
+                latent_model_input = (
+                    torch.cat([latents] * 2)
+                    if do_classifier_free_guidance
+                    else latents
+                )
+                latent_model_input = self.scheduler.scale_model_input(
+                    latent_model_input, t
+                )
+                down_block_res_samples, mid_block_res_sample = self.controlnet(
+                    latent_model_input,
+                    t,
+                    encoder_hidden_states=prompt_embeds,
+                    controlnet_cond=control_image,
+                    return_dict=False,
+                )
+                down_block_res_samples = [
+                    down_block_res_sample * controlnet_conditioning_scale
+                    for down_block_res_sample in down_block_res_samples
+                ]
+                mid_block_res_sample *= controlnet_conditioning_scale
+                # predict the noise residual
+                latent_model_input = torch.cat(
+                    [latent_model_input, mask, masked_image_latents], dim=1
+                )
+                noise_pred = self.unet(
+                    latent_model_input,
+                    t,
+                    encoder_hidden_states=prompt_embeds,
+                    cross_attention_kwargs=cross_attention_kwargs,
+                    down_block_additional_residuals=down_block_res_samples,
+                    mid_block_additional_residual=mid_block_res_sample,
+                ).sample
+                # perform guidance
+                if do_classifier_free_guidance:
+                    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                    noise_pred = noise_pred_uncond + guidance_scale * (
+                        noise_pred_text - noise_pred_uncond
+                    )
+                # compute the previous noisy sample x_t -> x_t-1
+                latents = self.scheduler.step(
+                    noise_pred, t, latents, **extra_step_kwargs
+                ).prev_sample
+                # call the callback, if provided
+                if i == len(timesteps) - 1 or (
+                    (i + 1) > num_warmup_steps
+                    and (i + 1) % self.scheduler.order == 0
+                ):
+                    progress_bar.update()
+                    if callback is not None and i % callback_steps == 0:
+                        callback(i, t, latents)
+        # If we do sequential model offloading, let's offload unet and controlnet
+        # manually for max memory savings
+        if (
+            hasattr(self, "final_offload_hook")
+            and self.final_offload_hook is not None
+        ):
+            self.unet.to("cpu")
+            self.controlnet.to("cpu")
+            torch.cuda.empty_cache()
+        if output_type == "latent":
+            image = latents
+            has_nsfw_concept = None
+        elif output_type == "pil":
+            # 8. Post-processing
+            image = self.decode_latents(latents)
+            # 9. Run safety checker
+            image, has_nsfw_concept = self.run_safety_checker(
+                image, device, prompt_embeds.dtype
+            )
+            # 10. Convert to PIL
+            image = self.numpy_to_pil(image)
+        else:
+            # 8. Post-processing
+            image = self.decode_latents(latents)
+            # 9. Run safety checker
+            image, has_nsfw_concept = self.run_safety_checker(
+                image, device, prompt_embeds.dtype
+            )
+        # Offload last model to CPU
+        if (
+            hasattr(self, "final_offload_hook")
+            and self.final_offload_hook is not None
+        ):
+            self.final_offload_hook.offload()
+        if not return_dict:
+            return (image, has_nsfw_concept)
+        return StableDiffusionPipelineOutput(
+            images=image, nsfw_content_detected=has_nsfw_concept
+        )

diffusion_webui/diffusion_models/controlnet/controlnet_mlsd.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import gradio as gr
+import torch
+from controlnet_aux import MLSDdetector
+from diffusers import ControlNetModel, StableDiffusionControlNetPipeline
+from PIL import Image
+from diffusion_webui.utils.model_list import stable_model_list
+from diffusion_webui.utils.scheduler_list import (
+    SCHEDULER_LIST,
+    get_scheduler_list,
+)
+class StableDiffusionControlNetMLSDGenerator:
+    def __init__(self):
+        self.pipe = None
+    def load_model(self, stable_model_path, controlnet_model_path, scheduler):
+        if self.pipe is None:
+            controlnet = ControlNetModel.from_pretrained(
+                controlnet_model_path, torch_dtype=torch.float16
+            )
+            self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
+                pretrained_model_name_or_path=stable_model_path,
+                controlnet=controlnet,
+                safety_checker=None,
+                torch_dtype=torch.float16,
+            )
+        self.pipe = get_scheduler_list(pipe=self.pipe, scheduler=scheduler)
+        self.pipe.to("cuda")
+        self.pipe.enable_xformers_memory_efficient_attention()
+        return self.pipe
+    def controlnet_mlsd(self, image_path: str):
+        mlsd = MLSDdetector.from_pretrained("lllyasviel/ControlNet")
+        image = Image.open(image_path)
+        image = mlsd(image)
+        return image
+    def generate_image(
+        self,
+        image_path: str,
+        model_path: str,
+        prompt: str,
+        negative_prompt: str,
+        num_images_per_prompt: int,
+        guidance_scale: int,
+        num_inference_step: int,
+        scheduler: str,
+        seed_generator: int,
+    ):
+        image = self.controlnet_mlsd(image_path=image_path)
+        pipe = self.load_model(
+            stable_model_path=model_path,
+            controlnet_model_path="lllyasviel/sd-controlnet-mlsd",
+            scheduler=scheduler,
+        )
+        if seed_generator == 0:
+            random_seed = torch.randint(0, 1000000, (1,))
+            generator = torch.manual_seed(random_seed)
+        else:
+            generator = torch.manual_seed(seed_generator)
+        output = pipe(
+            prompt=prompt,
+            image=image,
+            negative_prompt=negative_prompt,
+            num_images_per_prompt=num_images_per_prompt,
+            num_inference_steps=num_inference_step,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        ).images
+        return output
+    def app():
+        with gr.Blocks():
+            with gr.Row():
+                with gr.Column():
+                    controlnet_mlsd_image_file = gr.Image(
+                        type="filepath", label="Image"
+                    )
+                    controlnet_mlsd_prompt = gr.Textbox(
+                        lines=1,
+                        show_label=False,
+                        placeholder="Prompt",
+                    )
+                    controlnet_mlsd_negative_prompt = gr.Textbox(
+                        lines=1,
+                        show_label=False,
+                        placeholder="Negative Prompt",
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            controlnet_mlsd_model_id = gr.Dropdown(
+                                choices=stable_model_list,
+                                value=stable_model_list[0],
+                                label="Stable Model Id",
+                            )
+                            controlnet_mlsd_guidance_scale = gr.Slider(
+                                minimum=0.1,
+                                maximum=15,
+                                step=0.1,
+                                value=7.5,
+                                label="Guidance Scale",
+                            )
+                            controlnet_mlsd_num_inference_step = gr.Slider(
+                                minimum=1,
+                                maximum=100,
+                                step=1,
+                                value=50,
+                                label="Num Inference Step",
+                            )
+                        with gr.Row():
+                            with gr.Column():
+                                controlnet_mlsd_scheduler = gr.Dropdown(
+                                    choices=SCHEDULER_LIST,
+                                    value=SCHEDULER_LIST[0],
+                                    label="Scheduler",
+                                )
+                                controlnet_mlsd_seed_generator = gr.Slider(
+                                    minimum=0,
+                                    maximum=1000000,
+                                    step=1,
+                                    value=0,
+                                    label="Seed Generator",
+                                )
+                                controlnet_mlsd_num_images_per_prompt = (
+                                    gr.Slider(
+                                        minimum=1,
+                                        maximum=10,
+                                        step=1,
+                                        value=1,
+                                        label="Number Of Images",
+                                    )
+                                )
+                    controlnet_mlsd_predict = gr.Button(value="Generator")
+                with gr.Column():
+                    output_image = gr.Gallery(
+                        label="Generated images",
+                        show_label=False,
+                        elem_id="gallery",
+                    ).style(grid=(1, 2))
+            controlnet_mlsd_predict.click(
+                fn=StableDiffusionControlNetMLSDGenerator().generate_image,
+                inputs=[
+                    controlnet_mlsd_image_file,
+                    controlnet_mlsd_model_id,
+                    controlnet_mlsd_prompt,
+                    controlnet_mlsd_negative_prompt,
+                    controlnet_mlsd_num_images_per_prompt,
+                    controlnet_mlsd_guidance_scale,
+                    controlnet_mlsd_num_inference_step,
+                    controlnet_mlsd_scheduler,
+                    controlnet_mlsd_seed_generator,
+                ],
+                outputs=output_image,
+            )

diffusion_webui/diffusion_models/controlnet/controlnet_pose.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import gradio as gr
+import torch
+from controlnet_aux import OpenposeDetector
+from diffusers import (
+    ControlNetModel,
+    StableDiffusionControlNetPipeline,
+    UniPCMultistepScheduler,
+)
+from PIL import Image
+from diffusion_webui.utils.model_list import (
+    controlnet_pose_model_list,
+    stable_model_list,
+)
+from diffusion_webui.utils.scheduler_list import (
+    SCHEDULER_LIST,
+    get_scheduler_list,
+)
+class StableDiffusionControlNetPoseGenerator:
+    def __init__(self):
+        self.pipe = None
+    def load_model(self, stable_model_path, controlnet_model_path, scheduler):
+        if self.pipe is None:
+            controlnet = ControlNetModel.from_pretrained(
+                controlnet_model_path, torch_dtype=torch.float16
+            )
+            self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
+                pretrained_model_name_or_path=stable_model_path,
+                controlnet=controlnet,
+                safety_checker=None,
+                torch_dtype=torch.float16,
+            )
+        self.pipe = get_scheduler_list(pipe=self.pipe, scheduler=scheduler)
+        self.pipe.to("cuda")
+        self.pipe.enable_xformers_memory_efficient_attention()
+        return self.pipe
+    def controlnet_pose(self, image_path: str):
+        openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
+        image = Image.open(image_path)
+        image = openpose(image)
+        return image
+    def generate_image(
+        self,
+        image_path: str,
+        stable_model_path: str,
+        controlnet_pose_model_path: str,
+        prompt: str,
+        negative_prompt: str,
+        num_images_per_prompt: int,
+        guidance_scale: int,
+        num_inference_step: int,
+        scheduler: str,
+        seed_generator: int,
+    ):
+        image = self.controlnet_pose(image_path=image_path)
+        pipe = self.load_model(
+            stable_model_path=stable_model_path,
+            controlnet_model_path=controlnet_pose_model_path,
+            scheduler=scheduler,
+        )
+        if seed_generator == 0:
+            random_seed = torch.randint(0, 1000000, (1,))
+            generator = torch.manual_seed(random_seed)
+        else:
+            generator = torch.manual_seed(seed_generator)
+        output = pipe(
+            prompt=prompt,
+            image=image,
+            negative_prompt=negative_prompt,
+            num_images_per_prompt=num_images_per_prompt,
+            num_inference_steps=num_inference_step,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        ).images
+        return output
+    def app():
+        with gr.Blocks():
+            with gr.Row():
+                with gr.Column():
+                    controlnet_pose_image_file = gr.Image(
+                        type="filepath", label="Image"
+                    )
+                    controlnet_pose_prompt = gr.Textbox(
+                        lines=1,
+                        show_label=False,
+                        placeholder="Prompt",
+                    )
+                    controlnet_pose_negative_prompt = gr.Textbox(
+                        lines=1,
+                        show_label=False,
+                        placeholder="Negative Prompt",
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            controlnet_pose_stable_model_id = gr.Dropdown(
+                                choices=stable_model_list,
+                                value=stable_model_list[0],
+                                label="Stable Model Id",
+                            )
+                            controlnet_pose_guidance_scale = gr.Slider(
+                                minimum=0.1,
+                                maximum=15,
+                                step=0.1,
+                                value=7.5,
+                                label="Guidance Scale",
+                            )
+                            controlnet_pose_num_inference_step = gr.Slider(
+                                minimum=1,
+                                maximum=100,
+                                step=1,
+                                value=50,
+                                label="Num Inference Step",
+                            )
+                            controlnet_pose_num_images_per_prompt = gr.Slider(
+                                minimum=1,
+                                maximum=10,
+                                step=1,
+                                value=1,
+                                label="Number Of Images",
+                            )
+                        with gr.Row():
+                            with gr.Column():
+                                controlnet_pose_model_id = gr.Dropdown(
+                                    choices=controlnet_pose_model_list,
+                                    value=controlnet_pose_model_list[0],
+                                    label="ControlNet Model Id",
+                                )
+                                controlnet_pose_scheduler = gr.Dropdown(
+                                    choices=SCHEDULER_LIST,
+                                    value=SCHEDULER_LIST[0],
+                                    label="Scheduler",
+                                )
+                                controlnet_pose_seed_generator = gr.Number(
+                                    minimum=0,
+                                    maximum=1000000,
+                                    step=1,
+                                    value=0,
+                                    label="Seed Generator",
+                                )
+                    controlnet_pose_predict = gr.Button(value="Generator")
+                with gr.Column():
+                    output_image = gr.Gallery(
+                        label="Generated images",
+                        show_label=False,
+                        elem_id="gallery",
+                    ).style(grid=(1, 2))
+            controlnet_pose_predict.click(
+                fn=StableDiffusionControlNetPoseGenerator().generate_image,
+                inputs=[
+                    controlnet_pose_image_file,
+                    controlnet_pose_stable_model_id,
+                    controlnet_pose_model_id,
+                    controlnet_pose_prompt,
+                    controlnet_pose_negative_prompt,
+                    controlnet_pose_num_images_per_prompt,
+                    controlnet_pose_guidance_scale,
+                    controlnet_pose_num_inference_step,
+                    controlnet_pose_scheduler,
+                    controlnet_pose_seed_generator,
+                ],
+                outputs=output_image,
+            )

diffusion_webui/diffusion_models/controlnet/controlnet_scribble.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import gradio as gr
+import torch
+from controlnet_aux import HEDdetector
+from diffusers import (
+    ControlNetModel,
+    StableDiffusionControlNetPipeline,
+    UniPCMultistepScheduler,
+)
+from PIL import Image
+from diffusion_webui.utils.model_list import (
+    controlnet_scribble_model_list,
+    stable_model_list,
+)
+from diffusion_webui.utils.scheduler_list import (
+    SCHEDULER_LIST,
+    get_scheduler_list,
+)
+class StableDiffusionControlNetScribbleGenerator:
+    def __init__(self):
+        self.pipe = None
+    def load_model(self, stable_model_path, controlnet_model_path, scheduler):
+        if self.pipe is None:
+            controlnet = ControlNetModel.from_pretrained(
+                controlnet_model_path, torch_dtype=torch.float16
+            )
+            self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
+                pretrained_model_name_or_path=stable_model_path,
+                controlnet=controlnet,
+                safety_checker=None,
+                torch_dtype=torch.float16,
+            )
+        self.pipe = get_scheduler_list(pipe=self.pipe, scheduler=scheduler)
+        self.pipe.to("cuda")
+        self.pipe.enable_xformers_memory_efficient_attention()
+        return self.pipe
+    def controlnet_scribble(self, image_path: str):
+        hed = HEDdetector.from_pretrained("lllyasviel/ControlNet")
+        image = Image.open(image_path)
+        image = hed(image, scribble=True)
+        return image
+    def generate_image(
+        self,
+        image_path: str,
+        stable_model_path: str,
+        controlnet_hed_model_path: str,
+        prompt: str,
+        negative_prompt: str,
+        num_images_per_prompt: int,
+        guidance_scale: int,
+        num_inference_step: int,
+        scheduler: str,
+        seed_generator: int,
+    ):
+        image = self.controlnet_scribble(image_path=image_path)
+        pipe = self.load_model(
+            stable_model_path=stable_model_path,
+            controlnet_model_path=controlnet_hed_model_path,
+            scheduler=scheduler,
+        )
+        if seed_generator == 0:
+            random_seed = torch.randint(0, 1000000, (1,))
+            generator = torch.manual_seed(random_seed)
+        else:
+            generator = torch.manual_seed(seed_generator)
+        output = pipe(
+            prompt=prompt,
+            image=image,
+            negative_prompt=negative_prompt,
+            num_images_per_prompt=num_images_per_prompt,
+            num_inference_steps=num_inference_step,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        ).images
+        return output
+    def app():
+        with gr.Blocks():
+            with gr.Row():
+                with gr.Column():
+                    controlnet_scribble_image_file = gr.Image(
+                        type="filepath", label="Image"
+                    )
+                    controlnet_scribble_prompt = gr.Textbox(
+                        lines=1,
+                        show_label=False,
+                        placeholder="Prompt",
+                    )
+                    controlnet_scribble_negative_prompt = gr.Textbox(
+                        lines=1,
+                        show_label=False,
+                        placeholder="Negative Prompt",
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            controlnet_scribble_stable_model_id = gr.Dropdown(
+                                choices=stable_model_list,
+                                value=stable_model_list[0],
+                                label="Stable Model Id",
+                            )
+                            controlnet_scribble_guidance_scale = gr.Slider(
+                                minimum=0.1,
+                                maximum=15,
+                                step=0.1,
+                                value=7.5,
+                                label="Guidance Scale",
+                            )
+                            controlnet_scribble_num_inference_step = gr.Slider(
+                                minimum=1,
+                                maximum=100,
+                                step=1,
+                                value=50,
+                                label="Num Inference Step",
+                            )
+                            controlnet_scribble_num_images_per_prompt = (
+                                gr.Slider(
+                                    minimum=1,
+                                    maximum=10,
+                                    step=1,
+                                    value=1,
+                                    label="Number Of Images",
+                                )
+                            )
+                        with gr.Row():
+                            with gr.Column():
+                                controlnet_scribble_model_id = gr.Dropdown(
+                                    choices=controlnet_scribble_model_list,
+                                    value=controlnet_scribble_model_list[0],
+                                    label="ControlNet Model Id",
+                                )
+                                controlnet_scribble_scheduler = gr.Dropdown(
+                                    choices=SCHEDULER_LIST,
+                                    value=SCHEDULER_LIST[0],
+                                    label="Scheduler",
+                                )
+                                controlnet_scribble_seed_generator = gr.Number(
+                                    minimum=0,
+                                    maximum=1000000,
+                                    step=1,
+                                    value=0,
+                                    label="Seed Generator",
+                                )
+                    controlnet_scribble_predict = gr.Button(value="Generator")
+                with gr.Column():
+                    output_image = gr.Gallery(
+                        label="Generated images",
+                        show_label=False,
+                        elem_id="gallery",
+                    ).style(grid=(1, 2))
+            controlnet_scribble_predict.click(
+                fn=StableDiffusionControlNetScribbleGenerator().generate_image,
+                inputs=[
+                    controlnet_scribble_image_file,
+                    controlnet_scribble_stable_model_id,
+                    controlnet_scribble_model_id,
+                    controlnet_scribble_prompt,
+                    controlnet_scribble_negative_prompt,
+                    controlnet_scribble_num_images_per_prompt,
+                    controlnet_scribble_guidance_scale,
+                    controlnet_scribble_num_inference_step,
+                    controlnet_scribble_scheduler,
+                    controlnet_scribble_seed_generator,
+                ],
+                outputs=output_image,
+            )

diffusion_webui/diffusion_models/controlnet/controlnet_seg.py ADDED Viewed

	@@ -0,0 +1,353 @@

+import gradio as gr
+import numpy as np
+import torch
+from diffusers import ControlNetModel, StableDiffusionControlNetPipeline
+from PIL import Image
+from transformers import AutoImageProcessor, UperNetForSemanticSegmentation
+from diffusion_webui.utils.model_list import stable_model_list
+from diffusion_webui.utils.scheduler_list import (
+    SCHEDULER_LIST,
+    get_scheduler_list,
+)
+def ade_palette():
+    """ADE20K palette that maps each class to RGB values."""
+    return [
+        [120, 120, 120],
+        [180, 120, 120],
+        [6, 230, 230],
+        [80, 50, 50],
+        [4, 200, 3],
+        [120, 120, 80],
+        [140, 140, 140],
+        [204, 5, 255],
+        [230, 230, 230],
+        [4, 250, 7],
+        [224, 5, 255],
+        [235, 255, 7],
+        [150, 5, 61],
+        [120, 120, 70],
+        [8, 255, 51],
+        [255, 6, 82],
+        [143, 255, 140],
+        [204, 255, 4],
+        [255, 51, 7],
+        [204, 70, 3],
+        [0, 102, 200],
+        [61, 230, 250],
+        [255, 6, 51],
+        [11, 102, 255],
+        [255, 7, 71],
+        [255, 9, 224],
+        [9, 7, 230],
+        [220, 220, 220],
+        [255, 9, 92],
+        [112, 9, 255],
+        [8, 255, 214],
+        [7, 255, 224],
+        [255, 184, 6],
+        [10, 255, 71],
+        [255, 41, 10],
+        [7, 255, 255],
+        [224, 255, 8],
+        [102, 8, 255],
+        [255, 61, 6],
+        [255, 194, 7],
+        [255, 122, 8],
+        [0, 255, 20],
+        [255, 8, 41],
+        [255, 5, 153],
+        [6, 51, 255],
+        [235, 12, 255],
+        [160, 150, 20],
+        [0, 163, 255],
+        [140, 140, 140],
+        [250, 10, 15],
+        [20, 255, 0],
+        [31, 255, 0],
+        [255, 31, 0],
+        [255, 224, 0],
+        [153, 255, 0],
+        [0, 0, 255],
+        [255, 71, 0],
+        [0, 235, 255],
+        [0, 173, 255],
+        [31, 0, 255],
+        [11, 200, 200],
+        [255, 82, 0],
+        [0, 255, 245],
+        [0, 61, 255],
+        [0, 255, 112],
+        [0, 255, 133],
+        [255, 0, 0],
+        [255, 163, 0],
+        [255, 102, 0],
+        [194, 255, 0],
+        [0, 143, 255],
+        [51, 255, 0],
+        [0, 82, 255],
+        [0, 255, 41],
+        [0, 255, 173],
+        [10, 0, 255],
+        [173, 255, 0],
+        [0, 255, 153],
+        [255, 92, 0],
+        [255, 0, 255],
+        [255, 0, 245],
+        [255, 0, 102],
+        [255, 173, 0],
+        [255, 0, 20],
+        [255, 184, 184],
+        [0, 31, 255],
+        [0, 255, 61],
+        [0, 71, 255],
+        [255, 0, 204],
+        [0, 255, 194],
+        [0, 255, 82],
+        [0, 10, 255],
+        [0, 112, 255],
+        [51, 0, 255],
+        [0, 194, 255],
+        [0, 122, 255],
+        [0, 255, 163],
+        [255, 153, 0],
+        [0, 255, 10],
+        [255, 112, 0],
+        [143, 255, 0],
+        [82, 0, 255],
+        [163, 255, 0],
+        [255, 235, 0],
+        [8, 184, 170],
+        [133, 0, 255],
+        [0, 255, 92],
+        [184, 0, 255],
+        [255, 0, 31],
+        [0, 184, 255],
+        [0, 214, 255],
+        [255, 0, 112],
+        [92, 255, 0],
+        [0, 224, 255],
+        [112, 224, 255],
+        [70, 184, 160],
+        [163, 0, 255],
+        [153, 0, 255],
+        [71, 255, 0],
+        [255, 0, 163],
+        [255, 204, 0],
+        [255, 0, 143],
+        [0, 255, 235],
+        [133, 255, 0],
+        [255, 0, 235],
+        [245, 0, 255],
+        [255, 0, 122],
+        [255, 245, 0],
+        [10, 190, 212],
+        [214, 255, 0],
+        [0, 204, 255],
+        [20, 0, 255],
+        [255, 255, 0],
+        [0, 153, 255],
+        [0, 41, 255],
+        [0, 255, 204],
+        [41, 0, 255],
+        [41, 255, 0],
+        [173, 0, 255],
+        [0, 245, 255],
+        [71, 0, 255],
+        [122, 0, 255],
+        [0, 255, 184],
+        [0, 92, 255],
+        [184, 255, 0],
+        [0, 133, 255],
+        [255, 214, 0],
+        [25, 194, 194],
+        [102, 255, 0],
+        [92, 0, 255],
+    ]
+class StableDiffusionControlNetSegGenerator:
+    def __init__(self):
+        self.pipe = None
+    def load_model(
+        self,
+        stable_model_path,
+        scheduler,
+    ):
+        if self.pipe is None:
+            controlnet = ControlNetModel.from_pretrained(
+                "lllyasviel/sd-controlnet-seg", torch_dtype=torch.float16
+            )
+            self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
+                pretrained_model_name_or_path=stable_model_path,
+                controlnet=controlnet,
+                safety_checker=None,
+                torch_dtype=torch.float16,
+            )
+        self.pipe = get_scheduler_list(pipe=self.pipe, scheduler=scheduler)
+        self.pipe.to("cuda")
+        self.pipe.enable_xformers_memory_efficient_attention()
+        return self.pipe
+    def controlnet_seg(self, image_path: str):
+        image_processor = AutoImageProcessor.from_pretrained(
+            "openmmlab/upernet-convnext-small"
+        )
+        image_segmentor = UperNetForSemanticSegmentation.from_pretrained(
+            "openmmlab/upernet-convnext-small"
+        )
+        image = Image.open(image_path).convert("RGB")
+        pixel_values = image_processor(image, return_tensors="pt").pixel_values
+        with torch.no_grad():
+            outputs = image_segmentor(pixel_values)
+        seg = image_processor.post_process_semantic_segmentation(
+            outputs, target_sizes=[image.size[::-1]]
+        )[0]
+        color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8)
+        palette = np.array(ade_palette())
+        for label, color in enumerate(palette):
+            color_seg[seg == label, :] = color
+        color_seg = color_seg.astype(np.uint8)
+        image = Image.fromarray(color_seg)
+        return image
+    def generate_image(
+        self,
+        image_path: str,
+        model_path: str,
+        prompt: str,
+        negative_prompt: str,
+        num_images_per_prompt: int,
+        guidance_scale: int,
+        num_inference_step: int,
+        scheduler: str,
+        seed_generator: int,
+    ):
+        image = self.controlnet_seg(image_path=image_path)
+        pipe = self.load_model(
+            stable_model_path=model_path,
+            scheduler=scheduler,
+        )
+        if seed_generator == 0:
+            random_seed = torch.randint(0, 1000000, (1,))
+            generator = torch.manual_seed(random_seed)
+        else:
+            generator = torch.manual_seed(seed_generator)
+        output = pipe(
+            prompt=prompt,
+            image=image,
+            negative_prompt=negative_prompt,
+            num_images_per_prompt=num_images_per_prompt,
+            num_inference_steps=num_inference_step,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        ).images
+        return output
+    def app():
+        with gr.Blocks():
+            with gr.Row():
+                with gr.Column():
+                    controlnet_seg_image_file = gr.Image(
+                        type="filepath", label="Image"
+                    )
+                    controlnet_seg_prompt = gr.Textbox(
+                        lines=1,
+                        show_label=False,
+                        placeholder="Prompt",
+                    )
+                    controlnet_seg_negative_prompt = gr.Textbox(
+                        lines=1,
+                        show_label=False,
+                        placeholder="Negative Prompt",
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            controlnet_seg_model_id = gr.Dropdown(
+                                choices=stable_model_list,
+                                value=stable_model_list[0],
+                                label="Stable Model Id",
+                            )
+                            controlnet_seg_guidance_scale = gr.Slider(
+                                minimum=0.1,
+                                maximum=15,
+                                step=0.1,
+                                value=7.5,
+                                label="Guidance Scale",
+                            )
+                            controlnet_seg_num_inference_step = gr.Slider(
+                                minimum=1,
+                                maximum=100,
+                                step=1,
+                                value=50,
+                                label="Num Inference Step",
+                            )
+                        with gr.Row():
+                            with gr.Column():
+                                controlnet_seg_scheduler = gr.Dropdown(
+                                    choices=SCHEDULER_LIST,
+                                    value=SCHEDULER_LIST[0],
+                                    label="Scheduler",
+                                )
+                                controlnet_seg_num_images_per_prompt = (
+                                    gr.Slider(
+                                        minimum=1,
+                                        maximum=10,
+                                        step=1,
+                                        value=1,
+                                        label="Number Of Images",
+                                    )
+                                )
+                                controlnet_seg_seed_generator = gr.Slider(
+                                    minimum=0,
+                                    maximum=1000000,
+                                    step=1,
+                                    value=0,
+                                    label="Seed Generator",
+                                )
+                    controlnet_seg_predict = gr.Button(value="Generator")
+                with gr.Column():
+                    output_image = gr.Gallery(
+                        label="Generated images",
+                        show_label=False,
+                        elem_id="gallery",
+                    ).style(grid=(1, 2))
+            controlnet_seg_predict.click(
+                fn=StableDiffusionControlNetSegGenerator().generate_image,
+                inputs=[
+                    controlnet_seg_image_file,
+                    controlnet_seg_model_id,
+                    controlnet_seg_prompt,
+                    controlnet_seg_negative_prompt,
+                    controlnet_seg_num_images_per_prompt,
+                    controlnet_seg_guidance_scale,
+                    controlnet_seg_num_inference_step,
+                    controlnet_seg_scheduler,
+                    controlnet_seg_seed_generator,
+                ],
+                outputs=[output_image],
+            )

diffusion_webui/diffusion_models/stable_diffusion/__init__.py ADDED Viewed

File without changes

diffusion_webui/diffusion_models/stable_diffusion/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (215 Bytes). View file

diffusion_webui/diffusion_models/stable_diffusion/__pycache__/img2img_app.cpython-38.pyc ADDED Viewed

Binary file (3.48 kB). View file

diffusion_webui/diffusion_models/stable_diffusion/__pycache__/inpaint_app.cpython-38.pyc ADDED Viewed

Binary file (3.44 kB). View file

diffusion_webui/diffusion_models/stable_diffusion/__pycache__/text2img_app.cpython-38.pyc ADDED Viewed

Binary file (3.52 kB). View file

diffusion_webui/diffusion_models/stable_diffusion/img2img_app.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import gradio as gr
+import torch
+from diffusers import StableDiffusionImg2ImgPipeline
+from PIL import Image
+from diffusion_webui.utils.model_list import stable_model_list
+from diffusion_webui.utils.scheduler_list import (
+    SCHEDULER_LIST,
+    get_scheduler_list,
+)
+class StableDiffusionImage2ImageGenerator:
+    def __init__(self):
+        self.pipe = None
+    def load_model(self, model_path, scheduler):
+        if self.pipe is None:
+            self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
+                model_path, safety_checker=None, torch_dtype=torch.float16
+            )
+        self.pipe = get_scheduler_list(pipe=self.pipe, scheduler=scheduler)
+        self.pipe.to("cuda")
+        self.pipe.enable_xformers_memory_efficient_attention()
+        return self.pipe
+    def generate_image(
+        self,
+        image_path: str,
+        model_path: str,
+        prompt: str,
+        negative_prompt: str,
+        num_images_per_prompt: int,
+        scheduler: str,
+        guidance_scale: int,
+        num_inference_step: int,
+        seed_generator=0,
+    ):
+        pipe = self.load_model(
+            model_path=model_path,
+            scheduler=scheduler,
+        )
+        if seed_generator == 0:
+            random_seed = torch.randint(0, 1000000, (1,))
+            generator = torch.manual_seed(random_seed)
+        else:
+            generator = torch.manual_seed(seed_generator)
+        image = Image.open(image_path)
+        images = pipe(
+            prompt,
+            image=image,
+            negative_prompt=negative_prompt,
+            num_images_per_prompt=num_images_per_prompt,
+            num_inference_steps=num_inference_step,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        ).images
+        return images
+    def app():
+        with gr.Blocks():
+            with gr.Row():
+                with gr.Column():
+                    image2image_image_file = gr.Image(
+                        type="filepath", label="Image"
+                    ).style(height=260)
+                    image2image_prompt = gr.Textbox(
+                        lines=1,
+                        placeholder="Prompt",
+                        show_label=False,
+                    )
+                    image2image_negative_prompt = gr.Textbox(
+                        lines=1,
+                        placeholder="Negative Prompt",
+                        show_label=False,
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            image2image_model_path = gr.Dropdown(
+                                choices=stable_model_list,
+                                value=stable_model_list[0],
+                                label="Stable Model Id",
+                            )
+                            image2image_guidance_scale = gr.Slider(
+                                minimum=0.1,
+                                maximum=15,
+                                step=0.1,
+                                value=7.5,
+                                label="Guidance Scale",
+                            )
+                            image2image_num_inference_step = gr.Slider(
+                                minimum=1,
+                                maximum=100,
+                                step=1,
+                                value=50,
+                                label="Num Inference Step",
+                            )
+                        with gr.Row():
+                            with gr.Column():
+                                image2image_scheduler = gr.Dropdown(
+                                    choices=SCHEDULER_LIST,
+                                    value=SCHEDULER_LIST[0],
+                                    label="Scheduler",
+                                )
+                                image2image_num_images_per_prompt = gr.Slider(
+                                    minimum=1,
+                                    maximum=30,
+                                    step=1,
+                                    value=1,
+                                    label="Number Of Images",
+                                )
+                                image2image_seed_generator = gr.Slider(
+                                    minimum=0,
+                                    maximum=1000000,
+                                    step=1,
+                                    value=0,
+                                    label="Seed(0 for random)",
+                                )
+                    image2image_predict_button = gr.Button(value="Generator")
+                with gr.Column():
+                    output_image = gr.Gallery(
+                        label="Generated images",
+                        show_label=False,
+                        elem_id="gallery",
+                    ).style(grid=(1, 2))
+        image2image_predict_button.click(
+            fn=StableDiffusionImage2ImageGenerator().generate_image,
+            inputs=[
+                image2image_image_file,
+                image2image_model_path,
+                image2image_prompt,
+                image2image_negative_prompt,
+                image2image_num_images_per_prompt,
+                image2image_scheduler,
+                image2image_guidance_scale,
+                image2image_num_inference_step,
+                image2image_seed_generator,
+            ],
+            outputs=[output_image],
+        )

diffusion_webui/diffusion_models/stable_diffusion/inpaint_app.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import gradio as gr
+import torch
+from diffusers import DiffusionPipeline
+from diffusion_webui.utils.model_list import stable_inpiant_model_list
+class StableDiffusionInpaintGenerator:
+    def __init__(self):
+        self.pipe = None
+    def load_model(self, model_path):
+        if self.pipe is None:
+            self.pipe = DiffusionPipeline.from_pretrained(
+                model_path, revision="fp16", torch_dtype=torch.float16
+            )
+        self.pipe.to("cuda")
+        self.pipe.enable_xformers_memory_efficient_attention()
+        return self.pipe
+    def generate_image(
+        self,
+        pil_image: str,
+        model_path: str,
+        prompt: str,
+        negative_prompt: str,
+        num_images_per_prompt: int,
+        guidance_scale: int,
+        num_inference_step: int,
+        seed_generator=0,
+    ):
+        image = pil_image["image"].convert("RGB").resize((512, 512))
+        mask_image = pil_image["mask"].convert("RGB").resize((512, 512))
+        pipe = self.load_model(model_path)
+        if seed_generator == 0:
+            random_seed = torch.randint(0, 1000000, (1,))
+            generator = torch.manual_seed(random_seed)
+        else:
+            generator = torch.manual_seed(seed_generator)
+        output = pipe(
+            prompt=prompt,
+            image=image,
+            mask_image=mask_image,
+            negative_prompt=negative_prompt,
+            num_images_per_prompt=num_images_per_prompt,
+            num_inference_steps=num_inference_step,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        ).images
+        return output
+    def app():
+        with gr.Blocks():
+            with gr.Row():
+                with gr.Column():
+                    stable_diffusion_inpaint_image_file = gr.Image(
+                        source="upload",
+                        tool="sketch",
+                        elem_id="image_upload",
+                        type="pil",
+                        label="Upload",
+                    ).style(height=260)
+                    stable_diffusion_inpaint_prompt = gr.Textbox(
+                        lines=1,
+                        placeholder="Prompt",
+                        show_label=False,
+                    )
+                    stable_diffusion_inpaint_negative_prompt = gr.Textbox(
+                        lines=1,
+                        placeholder="Negative Prompt",
+                        show_label=False,
+                    )
+                    stable_diffusion_inpaint_model_id = gr.Dropdown(
+                        choices=stable_inpiant_model_list,
+                        value=stable_inpiant_model_list[0],
+                        label="Inpaint Model Id",
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            stable_diffusion_inpaint_guidance_scale = gr.Slider(
+                                minimum=0.1,
+                                maximum=15,
+                                step=0.1,
+                                value=7.5,
+                                label="Guidance Scale",
+                            )
+                            stable_diffusion_inpaint_num_inference_step = (
+                                gr.Slider(
+                                    minimum=1,
+                                    maximum=100,
+                                    step=1,
+                                    value=50,
+                                    label="Num Inference Step",
+                                )
+                            )
+                        with gr.Row():
+                            with gr.Column():
+                                stable_diffusion_inpiant_num_images_per_prompt = gr.Slider(
+                                    minimum=1,
+                                    maximum=10,
+                                    step=1,
+                                    value=1,
+                                    label="Number Of Images",
+                                )
+                                stable_diffusion_inpaint_seed_generator = (
+                                    gr.Slider(
+                                        minimum=0,
+                                        maximum=1000000,
+                                        step=1,
+                                        value=0,
+                                        label="Seed(0 for random)",
+                                    )
+                                )
+                    stable_diffusion_inpaint_predict = gr.Button(
+                        value="Generator"
+                    )
+                with gr.Column():
+                    output_image = gr.Gallery(
+                        label="Generated images",
+                        show_label=False,
+                        elem_id="gallery",
+                    ).style(grid=(1, 2))
+            stable_diffusion_inpaint_predict.click(
+                fn=StableDiffusionInpaintGenerator().generate_image,
+                inputs=[
+                    stable_diffusion_inpaint_image_file,
+                    stable_diffusion_inpaint_model_id,
+                    stable_diffusion_inpaint_prompt,
+                    stable_diffusion_inpaint_negative_prompt,
+                    stable_diffusion_inpiant_num_images_per_prompt,
+                    stable_diffusion_inpaint_guidance_scale,
+                    stable_diffusion_inpaint_num_inference_step,
+                    stable_diffusion_inpaint_seed_generator,
+                ],
+                outputs=[output_image],
+            )

diffusion_webui/diffusion_models/stable_diffusion/text2img_app.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import gradio as gr
+import torch
+from diffusers import StableDiffusionPipeline
+from diffusion_webui.utils.model_list import stable_model_list
+from diffusion_webui.utils.scheduler_list import get_scheduler_list
+class StableDiffusionText2ImageGenerator:
+    def __init__(self):
+        self.pipe = None
+    def load_model(
+        self,
+        model_path,
+        scheduler,
+    ):
+        if self.pipe is None:
+            self.pipe = StableDiffusionPipeline.from_pretrained(
+                model_path, safety_checker=None, torch_dtype=torch.float16
+            )
+        self.pipe = get_scheduler_list(pipe=self.pipe, scheduler=scheduler)
+        self.pipe.to("cuda")
+        self.pipe.enable_xformers_memory_efficient_attention()
+        return self.pipe
+    def generate_image(
+        self,
+        model_path: str,
+        prompt: str,
+        negative_prompt: str,
+        num_images_per_prompt: int,
+        scheduler: str,
+        guidance_scale: int,
+        num_inference_step: int,
+        height: int,
+        width: int,
+        seed_generator=0,
+    ):
+        pipe = self.load_model(
+            model_path=model_path,
+            scheduler=scheduler,
+        )
+        if seed_generator == 0:
+            random_seed = torch.randint(0, 1000000, (1,))
+            generator = torch.manual_seed(random_seed)
+        else:
+            generator = torch.manual_seed(seed_generator)
+        images = pipe(
+            prompt=prompt,
+            height=height,
+            width=width,
+            negative_prompt=negative_prompt,
+            num_images_per_prompt=num_images_per_prompt,
+            num_inference_steps=num_inference_step,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        ).images
+        return images
+    def app():
+        with gr.Blocks():
+            with gr.Row():
+                with gr.Column():
+                    text2image_prompt = gr.Textbox(
+                        lines=1,
+                        placeholder="Prompt",
+                        show_label=False,
+                    )
+                    text2image_negative_prompt = gr.Textbox(
+                        lines=1,
+                        placeholder="Negative Prompt",
+                        show_label=False,
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            text2image_model_path = gr.Dropdown(
+                                choices=stable_model_list,
+                                value=stable_model_list[0],
+                                label="Text-Image Model Id",
+                            )
+                            text2image_guidance_scale = gr.Slider(
+                                minimum=0.1,
+                                maximum=15,
+                                step=0.1,
+                                value=7.5,
+                                label="Guidance Scale",
+                            )
+                            text2image_num_inference_step = gr.Slider(
+                                minimum=1,
+                                maximum=100,
+                                step=1,
+                                value=50,
+                                label="Num Inference Step",
+                            )
+                            text2image_num_images_per_prompt = gr.Slider(
+                                minimum=1,
+                                maximum=30,
+                                step=1,
+                                value=1,
+                                label="Number Of Images",
+                            )
+                        with gr.Row():
+                            with gr.Column():
+                                text2image_scheduler = gr.Dropdown(
+                                    choices=[
+                                        "DDIM",
+                                        "EulerA",
+                                        "Euler",
+                                        "LMS",
+                                        "Heun",
+                                    ],
+                                    value="DDIM",
+                                    label="Scheduler",
+                                )
+                                text2image_height = gr.Slider(
+                                    minimum=128,
+                                    maximum=1280,
+                                    step=32,
+                                    value=512,
+                                    label="Image Height",
+                                )
+                                text2image_width = gr.Slider(
+                                    minimum=128,
+                                    maximum=1280,
+                                    step=32,
+                                    value=512,
+                                    label="Image Width",
+                                )
+                                text2image_seed_generator = gr.Slider(
+                                    label="Seed(0 for random)",
+                                    minimum=0,
+                                    maximum=1000000,
+                                    value=0,
+                                )
+                    text2image_predict = gr.Button(value="Generator")
+                with gr.Column():
+                    output_image = gr.Gallery(
+                        label="Generated images",
+                        show_label=False,
+                        elem_id="gallery",
+                    ).style(grid=(1, 2), height=200)
+            text2image_predict.click(
+                fn=StableDiffusionText2ImageGenerator().generate_image,
+                inputs=[
+                    text2image_model_path,
+                    text2image_prompt,
+                    text2image_negative_prompt,
+                    text2image_num_images_per_prompt,
+                    text2image_scheduler,
+                    text2image_guidance_scale,
+                    text2image_num_inference_step,
+                    text2image_height,
+                    text2image_width,
+                    text2image_seed_generator,
+                ],
+                outputs=output_image,
+            )

diffusion_webui/helpers.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from diffusion_webui.diffusion_models.controlnet.controlnet_canny import (
+    StableDiffusionControlNetCannyGenerator,
+)
+from diffusion_webui.diffusion_models.controlnet.controlnet_depth import (
+    StableDiffusionControlNetDepthGenerator,
+)
+from diffusion_webui.diffusion_models.controlnet.controlnet_hed import (
+    StableDiffusionControlNetHEDGenerator,
+)
+from diffusion_webui.diffusion_models.controlnet.controlnet_inpaint.controlnet_inpaint_app import (
+    StableDiffusionControlInpaintNetCannyGenerator,
+)
+from diffusion_webui.diffusion_models.controlnet.controlnet_mlsd import (
+    StableDiffusionControlNetMLSDGenerator,
+)
+from diffusion_webui.diffusion_models.controlnet.controlnet_pose import (
+    StableDiffusionControlNetPoseGenerator,
+)
+from diffusion_webui.diffusion_models.controlnet.controlnet_scribble import (
+    StableDiffusionControlNetScribbleGenerator,
+)
+from diffusion_webui.diffusion_models.controlnet.controlnet_seg import (
+    StableDiffusionControlNetSegGenerator,
+)
+from diffusion_webui.diffusion_models.stable_diffusion.img2img_app import (
+    StableDiffusionImage2ImageGenerator,
+)
+from diffusion_webui.diffusion_models.stable_diffusion.inpaint_app import (
+    StableDiffusionInpaintGenerator,
+)
+from diffusion_webui.diffusion_models.stable_diffusion.text2img_app import (
+    StableDiffusionText2ImageGenerator,
+)

diffusion_webui/utils/__init__.py ADDED Viewed

File without changes

diffusion_webui/utils/model_list.py ADDED Viewed

	@@ -0,0 +1,33 @@

+stable_model_list = [
+    "runwayml/stable-diffusion-v1-5",
+    "stabilityai/stable-diffusion-2-1",
+]
+controlnet_canny_model_list = [
+    "lllyasviel/sd-controlnet-canny",
+    "thibaud/controlnet-sd21-canny-diffusers",
+]
+controlnet_depth_model_list = [
+    "lllyasviel/sd-controlnet-depth",
+    "thibaud/controlnet-sd21-depth-diffusers",
+]
+controlnet_pose_model_list = [
+    "lllyasviel/sd-controlnet-openpose",
+    "thibaud/controlnet-sd21-openpose-diffusers",
+]
+controlnet_hed_model_list = [
+    "lllyasviel/sd-controlnet-hed",
+    "thibaud/controlnet-sd21-hed-diffusers",
+]
+controlnet_scribble_model_list = [
+    "lllyasviel/sd-controlnet-scribble",
+    "thibaud/controlnet-sd21-scribble-diffusers",
+]
+stable_inpiant_model_list = [
+    "stabilityai/stable-diffusion-2-inpainting",
+    "runwayml/stable-diffusion-inpainting",
+]

diffusion_webui/utils/scheduler_list.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from diffusers import (
+    DDIMScheduler,
+    EulerAncestralDiscreteScheduler,
+    EulerDiscreteScheduler,
+    HeunDiscreteScheduler,
+    LMSDiscreteScheduler,
+    UniPCMultistepScheduler,
+)
+SCHEDULER_LIST = [
+    "DDIM",
+    "EulerA",
+    "Euler",
+    "LMS",
+    "Heun",
+    "UniPC",
+]
+def get_scheduler_list(pipe, scheduler):
+    if scheduler == SCHEDULER_LIST[0]:
+        pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
+    elif scheduler == SCHEDULER_LIST[1]:
+        pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(
+            pipe.scheduler.config
+        )
+    elif scheduler == SCHEDULER_LIST[2]:
+        pipe.scheduler = EulerDiscreteScheduler.from_config(
+            pipe.scheduler.config
+        )
+    elif scheduler == SCHEDULER_LIST[3]:
+        pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
+    elif scheduler == SCHEDULER_LIST[4]:
+        pipe.scheduler = HeunDiscreteScheduler.from_config(
+            pipe.scheduler.config
+        )
+    elif scheduler == SCHEDULER_LIST[5]:
+        pipe.scheduler = UniPCMultistepScheduler.from_config(
+            pipe.scheduler.config
+        )
+    return pipe