Spaces:

hysts
/

ControlNet

Running

App Files Files Community

limit max number of images

by radames - opened Feb 15, 2023

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+1024

-1329

Files changed (19) hide show

.pre-commit-config.yaml +0 -10
LICENSE +0 -21
README.md +2 -4
app.py +44 -123
app_canny.py → gradio_canny2image.py +35 -52
app_depth.py → gradio_depth2image.py +25 -43
app_fake_scribble.py → gradio_fake_scribble2image.py +25 -40
app_hed.py → gradio_hed2image.py +25 -40
app_hough.py → gradio_hough2image.py +28 -44
app_normal.py → gradio_normal2image.py +26 -44
app_pose.py → gradio_pose2image.py +25 -46
app_scribble.py → gradio_scribble2image.py +24 -38
app_scribble_interactive.py → gradio_scribble2image_interactive.py +26 -41
app_seg.py → gradio_seg2image.py +25 -43
model.py +674 -598
notebooks/notebook.ipynb +0 -80
patch +0 -13
requirements.txt +1 -5
style.css +0 -5

.pre-commit-config.yaml CHANGED Viewed

@@ -35,13 +35,3 @@ repos:
   hooks:
   - id: yapf
     args: ['--parallel', '--in-place']
-- repo: https://github.com/kynan/nbstripout
-  rev: 0.6.0
-  hooks:
-    - id: nbstripout
-      args: ['--extra-keys', 'metadata.interpreter metadata.kernelspec cell.metadata.pycharm']
-- repo: https://github.com/nbQA-dev/nbQA
-  rev: 1.6.4
-  hooks:
-    - id: nbqa-isort
-    - id: nbqa-yapf

   hooks:
   - id: yapf
     args: ['--parallel', '--in-place']

LICENSE DELETED Viewed

@@ -1,21 +0,0 @@
-MIT License
-Copyright (c) 2023 hysts
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

README.md CHANGED Viewed

@@ -4,12 +4,10 @@ emoji: 🌖
 colorFrom: pink
 colorTo: blue
 sdk: gradio
-sdk_version: 3.36.1
-python_version: 3.10.11
 app_file: app.py
 pinned: false
-license: mit
-suggested_hardware: t4-medium
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 colorFrom: pink
 colorTo: blue
 sdk: gradio
+sdk_version: 3.18.0
+python_version: 3.10.9
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -3,155 +3,76 @@
 from __future__ import annotations
 import os
-import pathlib
 import shlex
 import subprocess
 import gradio as gr
-import torch
 if os.getenv('SYSTEM') == 'spaces':
     with open('patch') as f:
         subprocess.run(shlex.split('patch -p1'), stdin=f, cwd='ControlNet')
-base_url = 'https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/'
-names = [
-    'body_pose_model.pth',
-    'dpt_hybrid-midas-501f0c75.pt',
-    'hand_pose_model.pth',
-    'mlsd_large_512_fp32.pth',
-    'mlsd_tiny_512_fp32.pth',
-    'network-bsds500.pth',
-    'upernet_global_small.pth',
-]
-for name in names:
-    command = f'wget https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/{name} -O {name}'
-    out_path = pathlib.Path(f'ControlNet/annotator/ckpts/{name}')
-    if out_path.exists():
-        continue
-    subprocess.run(shlex.split(command), cwd='ControlNet/annotator/ckpts/')
-from app_canny import create_demo as create_demo_canny
-from app_depth import create_demo as create_demo_depth
-from app_fake_scribble import create_demo as create_demo_fake_scribble
-from app_hed import create_demo as create_demo_hed
-from app_hough import create_demo as create_demo_hough
-from app_normal import create_demo as create_demo_normal
-from app_pose import create_demo as create_demo_pose
-from app_scribble import create_demo as create_demo_scribble
-from app_scribble_interactive import \
     create_demo as create_demo_scribble_interactive
-from app_seg import create_demo as create_demo_seg
-from model import Model, download_all_controlnet_weights
-DESCRIPTION = '''# [ControlNet v1.0](https://github.com/lllyasviel/ControlNet)
-<p class="note">New ControlNet v1.1 is available <a href="https://huggingface.co/spaces/hysts/ControlNet-v1-1">here</a>.</p>
 '''
-SPACE_ID = os.getenv('SPACE_ID')
-ALLOW_CHANGING_BASE_MODEL = SPACE_ID != 'hysts/ControlNet'
-if SPACE_ID is not None:
-    DESCRIPTION += f'\n<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>'
-if not torch.cuda.is_available():
-    DESCRIPTION += '\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>'
-if torch.cuda.is_available():
-    if os.getenv('SYSTEM') == 'spaces':
-        download_all_controlnet_weights()
-MAX_IMAGES = int(os.getenv('MAX_IMAGES', '3'))
-DEFAULT_NUM_IMAGES = min(MAX_IMAGES, int(os.getenv('DEFAULT_NUM_IMAGES', '1')))
-DEFAULT_MODEL_ID = os.getenv('DEFAULT_MODEL_ID',
-                             'runwayml/stable-diffusion-v1-5')
-model = Model(base_model_id=DEFAULT_MODEL_ID, task_name='canny')
 with gr.Blocks(css='style.css') as demo:
     gr.Markdown(DESCRIPTION)
     with gr.Tabs():
         with gr.TabItem('Canny'):
-            create_demo_canny(model.process_canny,
-                              max_images=MAX_IMAGES,
-                              default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Hough'):
-            create_demo_hough(model.process_hough,
-                              max_images=MAX_IMAGES,
-                              default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('HED'):
-            create_demo_hed(model.process_hed,
-                            max_images=MAX_IMAGES,
-                            default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Scribble'):
-            create_demo_scribble(model.process_scribble,
-                                 max_images=MAX_IMAGES,
-                                 default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Scribble Interactive'):
             create_demo_scribble_interactive(
-                model.process_scribble_interactive,
-                max_images=MAX_IMAGES,
-                default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Fake Scribble'):
-            create_demo_fake_scribble(model.process_fake_scribble,
-                                      max_images=MAX_IMAGES,
-                                      default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Pose'):
-            create_demo_pose(model.process_pose,
-                             max_images=MAX_IMAGES,
-                             default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Segmentation'):
-            create_demo_seg(model.process_seg,
-                            max_images=MAX_IMAGES,
-                            default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Depth'):
-            create_demo_depth(model.process_depth,
-                              max_images=MAX_IMAGES,
-                              default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Normal map'):
-            create_demo_normal(model.process_normal,
-                               max_images=MAX_IMAGES,
-                               default_num_images=DEFAULT_NUM_IMAGES)
-    with gr.Accordion(label='Base model', open=False):
-        with gr.Row():
-            with gr.Column():
-                current_base_model = gr.Text(label='Current base model')
-            with gr.Column(scale=0.3):
-                check_base_model_button = gr.Button('Check current base model')
-        with gr.Row():
-            with gr.Column():
-                new_base_model_id = gr.Text(
-                    label='New base model',
-                    max_lines=1,
-                    placeholder='runwayml/stable-diffusion-v1-5',
-                    info=
-                    'The base model must be compatible with Stable Diffusion v1.5.',
-                    interactive=ALLOW_CHANGING_BASE_MODEL)
-            with gr.Column(scale=0.3):
-                change_base_model_button = gr.Button(
-                    'Change base model', interactive=ALLOW_CHANGING_BASE_MODEL)
-        if not ALLOW_CHANGING_BASE_MODEL:
-            gr.Markdown(
-                '''The base model is not allowed to be changed in this Space so as not to slow down the demo, but it can be changed if you duplicate the Space. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a>'''
-            )
-    gr.Markdown('''### Related Spaces
-- [Space using Anything-v4.0 as base model](https://huggingface.co/spaces/hysts/ControlNet-with-Anything-v4)
-- https://huggingface.co/spaces/jonigata/PoseMaker2
-- https://huggingface.co/spaces/diffusers/controlnet-openpose
-- https://huggingface.co/spaces/diffusers/controlnet-canny
-''')
-    check_base_model_button.click(fn=lambda: model.base_model_id,
-                                  outputs=current_base_model,
-                                  queue=False)
-    new_base_model_id.submit(fn=model.set_base_model,
-                             inputs=new_base_model_id,
-                             outputs=current_base_model)
-    change_base_model_button.click(fn=model.set_base_model,
-                                   inputs=new_base_model_id,
-                                   outputs=current_base_model)
-demo.queue(api_open=False, max_size=10).launch()

 from __future__ import annotations
 import os
 import shlex
 import subprocess
 import gradio as gr
 if os.getenv('SYSTEM') == 'spaces':
     with open('patch') as f:
         subprocess.run(shlex.split('patch -p1'), stdin=f, cwd='ControlNet')
+    commands = [
+        'wget https://huggingface.co/ckpt/ControlNet/resolve/main/dpt_hybrid-midas-501f0c75.pt -O dpt_hybrid-midas-501f0c75.pt',
+        'wget https://huggingface.co/ckpt/ControlNet/resolve/main/body_pose_model.pth -O body_pose_model.pth',
+        'wget https://huggingface.co/ckpt/ControlNet/resolve/main/hand_pose_model.pth -O hand_pose_model.pth',
+        'wget https://huggingface.co/ckpt/ControlNet/resolve/main/mlsd_large_512_fp32.pth -O mlsd_large_512_fp32.pth',
+        'wget https://huggingface.co/ckpt/ControlNet/resolve/main/mlsd_tiny_512_fp32.pth -O mlsd_tiny_512_fp32.pth',
+        'wget https://huggingface.co/ckpt/ControlNet/resolve/main/network-bsds500.pth -O network-bsds500.pth',
+        'wget https://huggingface.co/ckpt/ControlNet/resolve/main/upernet_global_small.pth -O upernet_global_small.pth',
+    ]
+    for command in commands:
+        subprocess.run(shlex.split(command), cwd='ControlNet/annotator/ckpts/')
+from gradio_canny2image import create_demo as create_demo_canny
+from gradio_depth2image import create_demo as create_demo_depth
+from gradio_fake_scribble2image import create_demo as create_demo_fake_scribble
+from gradio_hed2image import create_demo as create_demo_hed
+from gradio_hough2image import create_demo as create_demo_hough
+from gradio_normal2image import create_demo as create_demo_normal
+from gradio_pose2image import create_demo as create_demo_pose
+from gradio_scribble2image import create_demo as create_demo_scribble
+from gradio_scribble2image_interactive import \
     create_demo as create_demo_scribble_interactive
+from gradio_seg2image import create_demo as create_demo_seg
+from model import Model
+MAX_IMAGES = 1
+DESCRIPTION = '''# ControlNet
+This is an unofficial demo for [https://github.com/lllyasviel/ControlNet](https://github.com/lllyasviel/ControlNet).
+'''
+if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
+    DESCRIPTION += f'''<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.<br/>
+<a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true">
+<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
+<p/>
 '''
+model = Model()
 with gr.Blocks(css='style.css') as demo:
     gr.Markdown(DESCRIPTION)
     with gr.Tabs():
         with gr.TabItem('Canny'):
+            create_demo_canny(model.process_canny, max_images=MAX_IMAGES)
         with gr.TabItem('Hough'):
+            create_demo_hough(model.process_hough, max_images=MAX_IMAGES)
         with gr.TabItem('HED'):
+            create_demo_hed(model.process_hed, max_images=MAX_IMAGES)
         with gr.TabItem('Scribble'):
+            create_demo_scribble(model.process_scribble, max_images=MAX_IMAGES)
         with gr.TabItem('Scribble Interactive'):
             create_demo_scribble_interactive(
+                model.process_scribble_interactive,  max_images=MAX_IMAGES)
         with gr.TabItem('Fake Scribble'):
+            create_demo_fake_scribble(model.process_fake_scribble, max_images=MAX_IMAGES)
         with gr.TabItem('Pose'):
+            create_demo_pose(model.process_pose, max_images=MAX_IMAGES)
         with gr.TabItem('Segmentation'):
+            create_demo_seg(model.process_seg, max_images=MAX_IMAGES)
         with gr.TabItem('Depth'):
+            create_demo_depth(model.process_depth, max_images=MAX_IMAGES)
         with gr.TabItem('Normal map'):
+            create_demo_normal(model.process_normal, max_images=MAX_IMAGES)
+demo.queue(api_open=False).launch()

app_canny.py → gradio_canny2image.py RENAMED Viewed

@@ -1,9 +1,9 @@
 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_canny2image.py
-# The original license file is LICENSE.ControlNet in this repo.
 import gradio as gr
-def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Canny Edge Maps')
@@ -16,40 +16,39 @@ def create_demo(process, max_images=12, default_num_images=3):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
-                                                 maximum=512,
                                                  value=512,
                                                  step=256)
-                    canny_low_threshold = gr.Slider(
-                        label='Canny low threshold',
-                        minimum=1,
-                        maximum=255,
-                        value=100,
-                        step=1)
-                    canny_high_threshold = gr.Slider(
-                        label='Canny high threshold',
-                        minimum=1,
-                        maximum=255,
-                        value=200,
-                        step=1)
-                    num_steps = gr.Slider(label='Steps',
-                                          minimum=1,
-                                          maximum=100,
-                                          value=20,
-                                          step=1)
-                    guidance_scale = gr.Slider(label='Guidance Scale',
-                                               minimum=0.1,
-                                               maximum=30.0,
-                                               value=9.0,
-                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -59,33 +58,17 @@ def create_demo(process, max_images=12, default_num_images=3):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result = gr.Gallery(label='Output',
-                                    show_label=False,
-                                    elem_id='gallery').style(grid=2,
-                                                             height='auto')
-        inputs = [
-            input_image,
-            prompt,
-            a_prompt,
-            n_prompt,
-            num_samples,
-            image_resolution,
-            num_steps,
-            guidance_scale,
-            seed,
-            canny_low_threshold,
-            canny_high_threshold,
         ]
-        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
-                         inputs=inputs,
-                         outputs=result,
                          api_name='canny')
     return demo
-if __name__ == '__main__':
-    from model import Model
-    model = Model()
-    demo = create_demo(model.process_canny)
-    demo.queue().launch()

 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_canny2image.py
+# The original license file is LICENSE.ControlNet this repo.
 import gradio as gr
+def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Canny Edge Maps')
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
+                                                 maximum=768,
                                                  value=512,
                                                  step=256)
+                    low_threshold = gr.Slider(label='Canny low threshold',
+                                              minimum=1,
+                                              maximum=255,
+                                              value=100,
+                                              step=1)
+                    high_threshold = gr.Slider(label='Canny high threshold',
+                                               minimum=1,
+                                               maximum=255,
+                                               value=200,
+                                               step=1)
+                    ddim_steps = gr.Slider(label='Steps',
+                                           minimum=1,
+                                           maximum=100,
+                                           value=20,
+                                           step=1)
+                    scale = gr.Slider(label='Guidance Scale',
+                                      minimum=0.1,
+                                      maximum=30.0,
+                                      value=9.0,
+                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
+                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result_gallery = gr.Gallery(label='Output',
+                                            show_label=False,
+                                            elem_id='gallery').style(
+                                                grid=2, height='auto')
+        ips = [
+            input_image, prompt, a_prompt, n_prompt, num_samples,
+            image_resolution, ddim_steps, scale, seed, eta, low_threshold,
+            high_threshold
         ]
         run_button.click(fn=process,
+                         inputs=ips,
+                         outputs=[result_gallery],
                          api_name='canny')
     return demo

app_depth.py → gradio_depth2image.py RENAMED Viewed

@@ -1,9 +1,9 @@
 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_depth2image.py
-# The original license file is LICENSE.ControlNet in this repo.
 import gradio as gr
-def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Depth Maps')
@@ -13,38 +13,37 @@ def create_demo(process, max_images=12, default_num_images=3):
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
-                    is_depth_image = gr.Checkbox(label='Is depth image',
-                                                 value=False)
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
-                                                 maximum=512,
                                                  value=512,
                                                  step=256)
                     detect_resolution = gr.Slider(label='Depth Resolution',
                                                   minimum=128,
-                                                  maximum=512,
                                                   value=384,
                                                   step=1)
-                    num_steps = gr.Slider(label='Steps',
-                                          minimum=1,
-                                          maximum=100,
-                                          value=20,
-                                          step=1)
-                    guidance_scale = gr.Slider(label='Guidance Scale',
-                                               minimum=0.1,
-                                               maximum=30.0,
-                                               value=9.0,
-                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -54,33 +53,16 @@ def create_demo(process, max_images=12, default_num_images=3):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result = gr.Gallery(label='Output',
-                                    show_label=False,
-                                    elem_id='gallery').style(grid=2,
-                                                             height='auto')
-        inputs = [
-            input_image,
-            prompt,
-            a_prompt,
-            n_prompt,
-            num_samples,
-            image_resolution,
-            detect_resolution,
-            num_steps,
-            guidance_scale,
-            seed,
-            is_depth_image,
         ]
-        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
-                         inputs=inputs,
-                         outputs=result,
                          api_name='depth')
     return demo
-if __name__ == '__main__':
-    from model import Model
-    model = Model()
-    demo = create_demo(model.process_depth)
-    demo.queue().launch()

 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_depth2image.py
+# The original license file is LICENSE.ControlNet this repo.
 import gradio as gr
+def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Depth Maps')
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
+                                                 maximum=768,
                                                  value=512,
                                                  step=256)
                     detect_resolution = gr.Slider(label='Depth Resolution',
                                                   minimum=128,
+                                                  maximum=1024,
                                                   value=384,
                                                   step=1)
+                    ddim_steps = gr.Slider(label='Steps',
+                                           minimum=1,
+                                           maximum=100,
+                                           value=20,
+                                           step=1)
+                    scale = gr.Slider(label='Guidance Scale',
+                                      minimum=0.1,
+                                      maximum=30.0,
+                                      value=9.0,
+                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
+                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result_gallery = gr.Gallery(label='Output',
+                                            show_label=False,
+                                            elem_id='gallery').style(
+                                                grid=2, height='auto')
+        ips = [
+            input_image, prompt, a_prompt, n_prompt, num_samples,
+            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
+                         inputs=ips,
+                         outputs=[result_gallery],
                          api_name='depth')
     return demo

app_fake_scribble.py → gradio_fake_scribble2image.py RENAMED Viewed

@@ -1,9 +1,9 @@
 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_fake_scribble2image.py
-# The original license file is LICENSE.ControlNet in this repo.
 import gradio as gr
-def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Fake Scribble Maps')
@@ -16,33 +16,34 @@ def create_demo(process, max_images=12, default_num_images=3):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
-                                                 maximum=512,
                                                  value=512,
                                                  step=256)
                     detect_resolution = gr.Slider(label='HED Resolution',
                                                   minimum=128,
-                                                  maximum=512,
                                                   value=512,
                                                   step=1)
-                    num_steps = gr.Slider(label='Steps',
-                                          minimum=1,
-                                          maximum=100,
-                                          value=20,
-                                          step=1)
-                    guidance_scale = gr.Slider(label='Guidance Scale',
-                                               minimum=0.1,
-                                               maximum=30.0,
-                                               value=9.0,
-                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -52,32 +53,16 @@ def create_demo(process, max_images=12, default_num_images=3):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result = gr.Gallery(label='Output',
-                                    show_label=False,
-                                    elem_id='gallery').style(grid=2,
-                                                             height='auto')
-        inputs = [
-            input_image,
-            prompt,
-            a_prompt,
-            n_prompt,
-            num_samples,
-            image_resolution,
-            detect_resolution,
-            num_steps,
-            guidance_scale,
-            seed,
         ]
-        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
-                         inputs=inputs,
-                         outputs=result,
                          api_name='fake_scribble')
     return demo
-if __name__ == '__main__':
-    from model import Model
-    model = Model()
-    demo = create_demo(model.process_fake_scribble)
-    demo.queue().launch()

 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_fake_scribble2image.py
+# The original license file is LICENSE.ControlNet this repo.
 import gradio as gr
+def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Fake Scribble Maps')
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
+                                                 maximum=768,
                                                  value=512,
                                                  step=256)
                     detect_resolution = gr.Slider(label='HED Resolution',
                                                   minimum=128,
+                                                  maximum=1024,
                                                   value=512,
                                                   step=1)
+                    ddim_steps = gr.Slider(label='Steps',
+                                           minimum=1,
+                                           maximum=100,
+                                           value=20,
+                                           step=1)
+                    scale = gr.Slider(label='Guidance Scale',
+                                      minimum=0.1,
+                                      maximum=30.0,
+                                      value=9.0,
+                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
+                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result_gallery = gr.Gallery(label='Output',
+                                            show_label=False,
+                                            elem_id='gallery').style(
+                                                grid=2, height='auto')
+        ips = [
+            input_image, prompt, a_prompt, n_prompt, num_samples,
+            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
+                         inputs=ips,
+                         outputs=[result_gallery],
                          api_name='fake_scribble')
     return demo

app_hed.py → gradio_hed2image.py RENAMED Viewed

@@ -1,9 +1,9 @@
 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_hed2image.py
-# The original license file is LICENSE.ControlNet in this repo.
 import gradio as gr
-def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with HED Maps')
@@ -16,33 +16,34 @@ def create_demo(process, max_images=12, default_num_images=3):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
-                                                 maximum=512,
                                                  value=512,
                                                  step=256)
                     detect_resolution = gr.Slider(label='HED Resolution',
                                                   minimum=128,
-                                                  maximum=512,
                                                   value=512,
                                                   step=1)
-                    num_steps = gr.Slider(label='Steps',
-                                          minimum=1,
-                                          maximum=100,
-                                          value=20,
-                                          step=1)
-                    guidance_scale = gr.Slider(label='Guidance Scale',
-                                               minimum=0.1,
-                                               maximum=30.0,
-                                               value=9.0,
-                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -52,32 +53,16 @@ def create_demo(process, max_images=12, default_num_images=3):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result = gr.Gallery(label='Output',
-                                    show_label=False,
-                                    elem_id='gallery').style(grid=2,
-                                                             height='auto')
-        inputs = [
-            input_image,
-            prompt,
-            a_prompt,
-            n_prompt,
-            num_samples,
-            image_resolution,
-            detect_resolution,
-            num_steps,
-            guidance_scale,
-            seed,
         ]
-        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
-                         inputs=inputs,
-                         outputs=result,
                          api_name='hed')
     return demo
-if __name__ == '__main__':
-    from model import Model
-    model = Model()
-    demo = create_demo(model.process_hed)
-    demo.queue().launch()

 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_hed2image.py
+# The original license file is LICENSE.ControlNet this repo.
 import gradio as gr
+def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with HED Maps')
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
+                                                 maximum=768,
                                                  value=512,
                                                  step=256)
                     detect_resolution = gr.Slider(label='HED Resolution',
                                                   minimum=128,
+                                                  maximum=1024,
                                                   value=512,
                                                   step=1)
+                    ddim_steps = gr.Slider(label='Steps',
+                                           minimum=1,
+                                           maximum=100,
+                                           value=20,
+                                           step=1)
+                    scale = gr.Slider(label='Guidance Scale',
+                                      minimum=0.1,
+                                      maximum=30.0,
+                                      value=9.0,
+                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
+                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result_gallery = gr.Gallery(label='Output',
+                                            show_label=False,
+                                            elem_id='gallery').style(
+                                                grid=2, height='auto')
+        ips = [
+            input_image, prompt, a_prompt, n_prompt, num_samples,
+            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
+                         inputs=ips,
+                         outputs=[result_gallery],
                          api_name='hed')
     return demo

app_hough.py → gradio_hough2image.py RENAMED Viewed

@@ -1,9 +1,9 @@
 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_hough2image.py
-# The original license file is LICENSE.ControlNet in this repo.
 import gradio as gr
-def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Hough Line Maps')
@@ -16,45 +16,46 @@ def create_demo(process, max_images=12, default_num_images=3):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
-                                                 maximum=512,
                                                  value=512,
                                                  step=256)
                     detect_resolution = gr.Slider(label='Hough Resolution',
                                                   minimum=128,
-                                                  maximum=512,
                                                   value=512,
                                                   step=1)
-                    mlsd_value_threshold = gr.Slider(
                         label='Hough value threshold (MLSD)',
                         minimum=0.01,
                         maximum=2.0,
                         value=0.1,
                         step=0.01)
-                    mlsd_distance_threshold = gr.Slider(
                         label='Hough distance threshold (MLSD)',
                         minimum=0.01,
                         maximum=20.0,
                         value=0.1,
                         step=0.01)
-                    num_steps = gr.Slider(label='Steps',
-                                          minimum=1,
-                                          maximum=100,
-                                          value=20,
-                                          step=1)
-                    guidance_scale = gr.Slider(label='Guidance Scale',
-                                               minimum=0.1,
-                                               maximum=30.0,
-                                               value=9.0,
-                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -64,34 +65,17 @@ def create_demo(process, max_images=12, default_num_images=3):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result = gr.Gallery(label='Output',
-                                    show_label=False,
-                                    elem_id='gallery').style(grid=2,
-                                                             height='auto')
-        inputs = [
-            input_image,
-            prompt,
-            a_prompt,
-            n_prompt,
-            num_samples,
-            image_resolution,
-            detect_resolution,
-            num_steps,
-            guidance_scale,
-            seed,
-            mlsd_value_threshold,
-            mlsd_distance_threshold,
         ]
-        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
-                         inputs=inputs,
-                         outputs=result,
                          api_name='hough')
     return demo
-if __name__ == '__main__':
-    from model import Model
-    model = Model()
-    demo = create_demo(model.process_hough)
-    demo.queue().launch()

 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_hough2image.py
+# The original license file is LICENSE.ControlNet this repo.
 import gradio as gr
+def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Hough Line Maps')
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
+                                                 maximum=768,
                                                  value=512,
                                                  step=256)
                     detect_resolution = gr.Slider(label='Hough Resolution',
                                                   minimum=128,
+                                                  maximum=1024,
                                                   value=512,
                                                   step=1)
+                    value_threshold = gr.Slider(
                         label='Hough value threshold (MLSD)',
                         minimum=0.01,
                         maximum=2.0,
                         value=0.1,
                         step=0.01)
+                    distance_threshold = gr.Slider(
                         label='Hough distance threshold (MLSD)',
                         minimum=0.01,
                         maximum=20.0,
                         value=0.1,
                         step=0.01)
+                    ddim_steps = gr.Slider(label='Steps',
+                                           minimum=1,
+                                           maximum=100,
+                                           value=20,
+                                           step=1)
+                    scale = gr.Slider(label='Guidance Scale',
+                                      minimum=0.1,
+                                      maximum=30.0,
+                                      value=9.0,
+                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
+                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result_gallery = gr.Gallery(label='Output',
+                                            show_label=False,
+                                            elem_id='gallery').style(
+                                                grid=2, height='auto')
+        ips = [
+            input_image, prompt, a_prompt, n_prompt, num_samples,
+            image_resolution, detect_resolution, ddim_steps, scale, seed, eta,
+            value_threshold, distance_threshold
         ]
         run_button.click(fn=process,
+                         inputs=ips,
+                         outputs=[result_gallery],
                          api_name='hough')
     return demo

app_normal.py → gradio_normal2image.py RENAMED Viewed

@@ -1,9 +1,9 @@
 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_normal2image.py
-# The original license file is LICENSE.ControlNet in this repo.
 import gradio as gr
-def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Normal Maps')
@@ -13,21 +13,19 @@ def create_demo(process, max_images=12, default_num_images=3):
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
-                    is_normal_image = gr.Checkbox(label='Is normal image',
-                                                  value=False)
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
-                                                 maximum=512,
                                                  value=512,
                                                  step=256)
                     detect_resolution = gr.Slider(label='Normal Resolution',
                                                   minimum=128,
-                                                  maximum=512,
                                                   value=384,
                                                   step=1)
                     bg_threshold = gr.Slider(
@@ -36,21 +34,22 @@ def create_demo(process, max_images=12, default_num_images=3):
                         maximum=1.0,
                         value=0.4,
                         step=0.01)
-                    num_steps = gr.Slider(label='Steps',
-                                          minimum=1,
-                                          maximum=100,
-                                          value=20,
-                                          step=1)
-                    guidance_scale = gr.Slider(label='Guidance Scale',
-                                               minimum=0.1,
-                                               maximum=30.0,
-                                               value=9.0,
-                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -60,34 +59,17 @@ def create_demo(process, max_images=12, default_num_images=3):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result = gr.Gallery(label='Output',
-                                    show_label=False,
-                                    elem_id='gallery').style(grid=2,
-                                                             height='auto')
-        inputs = [
-            input_image,
-            prompt,
-            a_prompt,
-            n_prompt,
-            num_samples,
-            image_resolution,
-            detect_resolution,
-            num_steps,
-            guidance_scale,
-            seed,
-            bg_threshold,
-            is_normal_image,
         ]
-        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
-                         inputs=inputs,
-                         outputs=result,
                          api_name='normal')
     return demo
-if __name__ == '__main__':
-    from model import Model
-    model = Model()
-    demo = create_demo(model.process_normal)
-    demo.queue().launch()

 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_normal2image.py
+# The original license file is LICENSE.ControlNet this repo.
 import gradio as gr
+def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Normal Maps')
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
+                                                 maximum=768,
                                                  value=512,
                                                  step=256)
                     detect_resolution = gr.Slider(label='Normal Resolution',
                                                   minimum=128,
+                                                  maximum=1024,
                                                   value=384,
                                                   step=1)
                     bg_threshold = gr.Slider(
                         maximum=1.0,
                         value=0.4,
                         step=0.01)
+                    ddim_steps = gr.Slider(label='Steps',
+                                           minimum=1,
+                                           maximum=100,
+                                           value=20,
+                                           step=1)
+                    scale = gr.Slider(label='Guidance Scale',
+                                      minimum=0.1,
+                                      maximum=30.0,
+                                      value=9.0,
+                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
+                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result_gallery = gr.Gallery(label='Output',
+                                            show_label=False,
+                                            elem_id='gallery').style(
+                                                grid=2, height='auto')
+        ips = [
+            input_image, prompt, a_prompt, n_prompt, num_samples,
+            image_resolution, detect_resolution, ddim_steps, scale, seed, eta,
+            bg_threshold
         ]
         run_button.click(fn=process,
+                         inputs=ips,
+                         outputs=[result_gallery],
                          api_name='normal')
     return demo

app_pose.py → gradio_pose2image.py RENAMED Viewed

@@ -1,9 +1,9 @@
 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_pose2image.py
-# The original license file is LICENSE.ControlNet in this repo.
 import gradio as gr
-def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Human Pose')
@@ -13,41 +13,37 @@ def create_demo(process, max_images=12, default_num_images=3):
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
-                    is_pose_image = gr.Checkbox(label='Is pose image',
-                                                value=False)
-                    gr.Markdown(
-                        'You can use [PoseMaker2](https://huggingface.co/spaces/jonigata/PoseMaker2) to create pose images.'
-                    )
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
-                                                 maximum=512,
                                                  value=512,
                                                  step=256)
                     detect_resolution = gr.Slider(label='OpenPose Resolution',
                                                   minimum=128,
-                                                  maximum=512,
                                                   value=512,
                                                   step=1)
-                    num_steps = gr.Slider(label='Steps',
-                                          minimum=1,
-                                          maximum=100,
-                                          value=20,
-                                          step=1)
-                    guidance_scale = gr.Slider(label='Guidance Scale',
-                                               minimum=0.1,
-                                               maximum=30.0,
-                                               value=9.0,
-                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -57,33 +53,16 @@ def create_demo(process, max_images=12, default_num_images=3):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result = gr.Gallery(label='Output',
-                                    show_label=False,
-                                    elem_id='gallery').style(grid=2,
-                                                             height='auto')
-        inputs = [
-            input_image,
-            prompt,
-            a_prompt,
-            n_prompt,
-            num_samples,
-            image_resolution,
-            detect_resolution,
-            num_steps,
-            guidance_scale,
-            seed,
-            is_pose_image,
         ]
-        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
-                         inputs=inputs,
-                         outputs=result,
                          api_name='pose')
     return demo
-if __name__ == '__main__':
-    from model import Model
-    model = Model()
-    demo = create_demo(model.process_pose)
-    demo.queue().launch()

 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_pose2image.py
+# The original license file is LICENSE.ControlNet this repo.
 import gradio as gr
+def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Human Pose')
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
+                                                 maximum=768,
                                                  value=512,
                                                  step=256)
                     detect_resolution = gr.Slider(label='OpenPose Resolution',
                                                   minimum=128,
+                                                  maximum=1024,
                                                   value=512,
                                                   step=1)
+                    ddim_steps = gr.Slider(label='Steps',
+                                           minimum=1,
+                                           maximum=100,
+                                           value=20,
+                                           step=1)
+                    scale = gr.Slider(label='Guidance Scale',
+                                      minimum=0.1,
+                                      maximum=30.0,
+                                      value=9.0,
+                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
+                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result_gallery = gr.Gallery(label='Output',
+                                            show_label=False,
+                                            elem_id='gallery').style(
+                                                grid=2, height='auto')
+        ips = [
+            input_image, prompt, a_prompt, n_prompt, num_samples,
+            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
+                         inputs=ips,
+                         outputs=[result_gallery],
                          api_name='pose')
     return demo

app_scribble.py → gradio_scribble2image.py RENAMED Viewed

@@ -1,9 +1,9 @@
 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_scribble2image.py
-# The original license file is LICENSE.ControlNet in this repo.
 import gradio as gr
-def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Scribble Maps')
@@ -16,28 +16,29 @@ def create_demo(process, max_images=12, default_num_images=3):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
-                                                 maximum=512,
                                                  value=512,
                                                  step=256)
-                    num_steps = gr.Slider(label='Steps',
-                                          minimum=1,
-                                          maximum=100,
-                                          value=20,
-                                          step=1)
-                    guidance_scale = gr.Slider(label='Guidance Scale',
-                                               minimum=0.1,
-                                               maximum=30.0,
-                                               value=9.0,
-                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -47,31 +48,16 @@ def create_demo(process, max_images=12, default_num_images=3):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result = gr.Gallery(label='Output',
-                                    show_label=False,
-                                    elem_id='gallery').style(grid=2,
-                                                             height='auto')
-        inputs = [
-            input_image,
-            prompt,
-            a_prompt,
-            n_prompt,
-            num_samples,
-            image_resolution,
-            num_steps,
-            guidance_scale,
-            seed,
         ]
-        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
-                         inputs=inputs,
-                         outputs=result,
                          api_name='scribble')
     return demo
-if __name__ == '__main__':
-    from model import Model
-    model = Model()
-    demo = create_demo(model.process_scribble)
-    demo.queue().launch()

 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_scribble2image.py
+# The original license file is LICENSE.ControlNet this repo.
 import gradio as gr
+def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Scribble Maps')
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
+                                                 maximum=768,
                                                  value=512,
                                                  step=256)
+                    ddim_steps = gr.Slider(label='Steps',
+                                           minimum=1,
+                                           maximum=100,
+                                           value=20,
+                                           step=1)
+                    scale = gr.Slider(label='Guidance Scale',
+                                      minimum=0.1,
+                                      maximum=30.0,
+                                      value=9.0,
+                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
+                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result_gallery = gr.Gallery(label='Output',
+                                            show_label=False,
+                                            elem_id='gallery').style(
+                                                grid=2, height='auto')
+        ips = [
+            input_image, prompt, a_prompt, n_prompt, num_samples,
+            image_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
+                         inputs=ips,
+                         outputs=[result_gallery],
                          api_name='scribble')
     return demo

app_scribble_interactive.py → gradio_scribble2image_interactive.py RENAMED Viewed

@@ -1,5 +1,5 @@
 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_scribble2image_interactive.py
-# The original license file is LICENSE.ControlNet in this repo.
 import gradio as gr
 import numpy as np
@@ -8,7 +8,7 @@ def create_canvas(w, h):
     return np.zeros(shape=(h, w, 3), dtype=np.uint8) + 255
-def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown(
@@ -17,12 +17,12 @@ def create_demo(process, max_images=12, default_num_images=3):
             with gr.Column():
                 canvas_width = gr.Slider(label='Canvas Width',
                                          minimum=256,
-                                         maximum=512,
                                          value=512,
                                          step=1)
                 canvas_height = gr.Slider(label='Canvas Height',
                                           minimum=256,
-                                          maximum=512,
                                           value=512,
                                           step=1)
                 create_button = gr.Button(label='Start',
@@ -37,36 +37,36 @@ def create_demo(process, max_images=12, default_num_images=3):
                 )
                 create_button.click(fn=create_canvas,
                                     inputs=[canvas_width, canvas_height],
-                                    outputs=input_image,
-                                    queue=False)
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
-                                                 maximum=512,
                                                  value=512,
                                                  step=256)
-                    num_steps = gr.Slider(label='Steps',
-                                          minimum=1,
-                                          maximum=100,
-                                          value=20,
-                                          step=1)
-                    guidance_scale = gr.Slider(label='Guidance Scale',
-                                               minimum=0.1,
-                                               maximum=30.0,
-                                               value=9.0,
-                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -76,28 +76,13 @@ def create_demo(process, max_images=12, default_num_images=3):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result = gr.Gallery(label='Output',
-                                    show_label=False,
-                                    elem_id='gallery').style(grid=2,
-                                                             height='auto')
-        inputs = [
-            input_image,
-            prompt,
-            a_prompt,
-            n_prompt,
-            num_samples,
-            image_resolution,
-            num_steps,
-            guidance_scale,
-            seed,
         ]
-        prompt.submit(fn=process, inputs=inputs, outputs=result)
-        run_button.click(fn=process, inputs=inputs, outputs=result)
     return demo
-if __name__ == '__main__':
-    from model import Model
-    model = Model()
-    demo = create_demo(model.process_scribble_interactive)
-    demo.queue().launch()

 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_scribble2image_interactive.py
+# The original license file is LICENSE.ControlNet this repo.
 import gradio as gr
 import numpy as np
     return np.zeros(shape=(h, w, 3), dtype=np.uint8) + 255
+def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown(
             with gr.Column():
                 canvas_width = gr.Slider(label='Canvas Width',
                                          minimum=256,
+                                         maximum=1024,
                                          value=512,
                                          step=1)
                 canvas_height = gr.Slider(label='Canvas Height',
                                           minimum=256,
+                                          maximum=1024,
                                           value=512,
                                           step=1)
                 create_button = gr.Button(label='Start',
                 )
                 create_button.click(fn=create_canvas,
                                     inputs=[canvas_width, canvas_height],
+                                    outputs=[input_image])
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
+                                                 maximum=768,
                                                  value=512,
                                                  step=256)
+                    ddim_steps = gr.Slider(label='Steps',
+                                           minimum=1,
+                                           maximum=100,
+                                           value=20,
+                                           step=1)
+                    scale = gr.Slider(label='Guidance Scale',
+                                      minimum=0.1,
+                                      maximum=30.0,
+                                      value=9.0,
+                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
+                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result_gallery = gr.Gallery(label='Output',
+                                            show_label=False,
+                                            elem_id='gallery').style(
+                                                grid=2, height='auto')
+        ips = [
+            input_image, prompt, a_prompt, n_prompt, num_samples,
+            image_resolution, ddim_steps, scale, seed, eta
         ]
+        run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
     return demo

app_seg.py → gradio_seg2image.py RENAMED Viewed

@@ -1,9 +1,9 @@
 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_seg2image.py
-# The original license file is LICENSE.ControlNet in this repo.
 import gradio as gr
-def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Segmentation Maps')
@@ -13,39 +13,38 @@ def create_demo(process, max_images=12, default_num_images=3):
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
-                    is_segmentation_map = gr.Checkbox(
-                        label='Is segmentation map', value=False)
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
-                                                 maximum=512,
                                                  value=512,
                                                  step=256)
                     detect_resolution = gr.Slider(
                         label='Segmentation Resolution',
                         minimum=128,
-                        maximum=512,
                         value=512,
                         step=1)
-                    num_steps = gr.Slider(label='Steps',
-                                          minimum=1,
-                                          maximum=100,
-                                          value=20,
-                                          step=1)
-                    guidance_scale = gr.Slider(label='Guidance Scale',
-                                               minimum=0.1,
-                                               maximum=30.0,
-                                               value=9.0,
-                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -55,33 +54,16 @@ def create_demo(process, max_images=12, default_num_images=3):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result = gr.Gallery(label='Output',
-                                    show_label=False,
-                                    elem_id='gallery').style(grid=2,
-                                                             height='auto')
-        inputs = [
-            input_image,
-            prompt,
-            a_prompt,
-            n_prompt,
-            num_samples,
-            image_resolution,
-            detect_resolution,
-            num_steps,
-            guidance_scale,
-            seed,
-            is_segmentation_map,
         ]
-        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
-                         inputs=inputs,
-                         outputs=result,
                          api_name='seg')
     return demo
-if __name__ == '__main__':
-    from model import Model
-    model = Model()
-    demo = create_demo(model.process_seg)
-    demo.queue().launch()

 # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_seg2image.py
+# The original license file is LICENSE.ControlNet this repo.
 import gradio as gr
+def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Segmentation Maps')
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
+                                                 maximum=768,
                                                  value=512,
                                                  step=256)
                     detect_resolution = gr.Slider(
                         label='Segmentation Resolution',
                         minimum=128,
+                        maximum=1024,
                         value=512,
                         step=1)
+                    ddim_steps = gr.Slider(label='Steps',
+                                           minimum=1,
+                                           maximum=100,
+                                           value=20,
+                                           step=1)
+                    scale = gr.Slider(label='Guidance Scale',
+                                      minimum=0.1,
+                                      maximum=30.0,
+                                      value=9.0,
+                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
+                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result_gallery = gr.Gallery(label='Output',
+                                            show_label=False,
+                                            elem_id='gallery').style(
+                                                grid=2, height='auto')
+        ips = [
+            input_image, prompt, a_prompt, n_prompt, num_samples,
+            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
+                         inputs=ips,
+                         outputs=[result_gallery],
                          api_name='seg')
     return demo

model.py CHANGED Viewed

@@ -1,649 +1,725 @@
 # This file is adapted from gradio_*.py in https://github.com/lllyasviel/ControlNet/tree/f4748e3630d8141d7765e2bd9b1e348f47847707
-# The original license file is LICENSE.ControlNet in this repo.
 from __future__ import annotations
-import gc
 import pathlib
 import sys
 import cv2
 import numpy as np
-import PIL.Image
 import torch
-from diffusers import (ControlNetModel, DiffusionPipeline,
-                       StableDiffusionControlNetPipeline,
-                       UniPCMultistepScheduler)
-repo_dir = pathlib.Path(__file__).parent
-submodule_dir = repo_dir / 'ControlNet'
-sys.path.append(submodule_dir.as_posix())
-try:
-    from annotator.canny import apply_canny
-    from annotator.hed import apply_hed, nms
-    from annotator.midas import apply_midas
-    from annotator.mlsd import apply_mlsd
-    from annotator.openpose import apply_openpose
-    from annotator.uniformer import apply_uniformer
-    from annotator.util import HWC3, resize_image
-except Exception:
-    pass
-CONTROLNET_MODEL_IDS = {
-    'canny': 'lllyasviel/sd-controlnet-canny',
-    'hough': 'lllyasviel/sd-controlnet-mlsd',
-    'hed': 'lllyasviel/sd-controlnet-hed',
-    'scribble': 'lllyasviel/sd-controlnet-scribble',
-    'pose': 'lllyasviel/sd-controlnet-openpose',
-    'seg': 'lllyasviel/sd-controlnet-seg',
-    'depth': 'lllyasviel/sd-controlnet-depth',
-    'normal': 'lllyasviel/sd-controlnet-normal',
-}
-def download_all_controlnet_weights() -> None:
-    for model_id in CONTROLNET_MODEL_IDS.values():
-        ControlNetModel.from_pretrained(model_id)
 class Model:
     def __init__(self,
-                 base_model_id: str = 'runwayml/stable-diffusion-v1-5',
-                 task_name: str = 'canny'):
         self.device = torch.device(
             'cuda:0' if torch.cuda.is_available() else 'cpu')
-        self.base_model_id = ''
         self.task_name = ''
-        self.pipe = self.load_pipe(base_model_id, task_name)
-    def load_pipe(self, base_model_id: str, task_name) -> DiffusionPipeline:
-        if self.device.type == 'cpu':
-            return None
-        if base_model_id == self.base_model_id and task_name == self.task_name and hasattr(
-                self, 'pipe'):
-            return self.pipe
-        model_id = CONTROLNET_MODEL_IDS[task_name]
-        controlnet = ControlNetModel.from_pretrained(model_id,
-                                                     torch_dtype=torch.float16)
-        pipe = StableDiffusionControlNetPipeline.from_pretrained(
-            base_model_id,
-            safety_checker=None,
-            controlnet=controlnet,
-            torch_dtype=torch.float16)
-        pipe.scheduler = UniPCMultistepScheduler.from_config(
-            pipe.scheduler.config)
-        pipe.enable_xformers_memory_efficient_attention()
-        pipe.to(self.device)
-        torch.cuda.empty_cache()
-        gc.collect()
-        self.base_model_id = base_model_id
-        self.task_name = task_name
-        return pipe
-    def set_base_model(self, base_model_id: str) -> str:
-        if not base_model_id or base_model_id == self.base_model_id:
-            return self.base_model_id
-        del self.pipe
-        torch.cuda.empty_cache()
-        gc.collect()
-        try:
-            self.pipe = self.load_pipe(base_model_id, self.task_name)
-        except Exception:
-            self.pipe = self.load_pipe(self.base_model_id, self.task_name)
-        return self.base_model_id
-    def load_controlnet_weight(self, task_name: str) -> None:
         if task_name == self.task_name:
             return
-        if 'controlnet' in self.pipe.__dict__:
-            del self.pipe.controlnet
-        torch.cuda.empty_cache()
-        gc.collect()
-        model_id = CONTROLNET_MODEL_IDS[task_name]
-        controlnet = ControlNetModel.from_pretrained(model_id,
-                                                     torch_dtype=torch.float16)
-        controlnet.to(self.device)
-        torch.cuda.empty_cache()
-        gc.collect()
-        self.pipe.controlnet = controlnet
         self.task_name = task_name
-    def get_prompt(self, prompt: str, additional_prompt: str) -> str:
-        if not prompt:
-            prompt = additional_prompt
-        else:
-            prompt = f'{prompt}, {additional_prompt}'
-        return prompt
-    @torch.autocast('cuda')
-    def run_pipe(
-        self,
-        prompt: str,
-        negative_prompt: str,
-        control_image: PIL.Image.Image,
-        num_images: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-    ) -> list[PIL.Image.Image]:
         if seed == -1:
-            seed = np.random.randint(0, np.iinfo(np.int64).max)
-        generator = torch.Generator().manual_seed(seed)
-        return self.pipe(prompt=prompt,
-                         negative_prompt=negative_prompt,
-                         guidance_scale=guidance_scale,
-                         num_images_per_prompt=num_images,
-                         num_inference_steps=num_steps,
-                         generator=generator,
-                         image=control_image).images
-    @staticmethod
-    def preprocess_canny(
-        input_image: np.ndarray,
-        image_resolution: int,
-        low_threshold: int,
-        high_threshold: int,
-    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
-        image = resize_image(HWC3(input_image), image_resolution)
-        control_image = apply_canny(image, low_threshold, high_threshold)
-        control_image = HWC3(control_image)
-        vis_control_image = 255 - control_image
-        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
-            vis_control_image)
     @torch.inference_mode()
-    def process_canny(
-        self,
-        input_image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        low_threshold: int,
-        high_threshold: int,
-    ) -> list[PIL.Image.Image]:
-        control_image, vis_control_image = self.preprocess_canny(
-            input_image=input_image,
-            image_resolution=image_resolution,
-            low_threshold=low_threshold,
-            high_threshold=high_threshold,
-        )
-        self.load_controlnet_weight('canny')
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [vis_control_image] + results
-    @staticmethod
-    def preprocess_hough(
-        input_image: np.ndarray,
-        image_resolution: int,
-        detect_resolution: int,
-        value_threshold: float,
-        distance_threshold: float,
-    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
-        control_image = apply_mlsd(
-            resize_image(input_image, detect_resolution), value_threshold,
-            distance_threshold)
-        control_image = HWC3(control_image)
-        image = resize_image(input_image, image_resolution)
-        H, W = image.shape[:2]
-        control_image = cv2.resize(control_image, (W, H),
-                                   interpolation=cv2.INTER_NEAREST)
-        vis_control_image = 255 - cv2.dilate(
-            control_image, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1)
-        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
-            vis_control_image)
     @torch.inference_mode()
-    def process_hough(
-        self,
-        input_image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        detect_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        value_threshold: float,
-        distance_threshold: float,
-    ) -> list[PIL.Image.Image]:
-        control_image, vis_control_image = self.preprocess_hough(
-            input_image=input_image,
-            image_resolution=image_resolution,
-            detect_resolution=detect_resolution,
-            value_threshold=value_threshold,
-            distance_threshold=distance_threshold,
-        )
-        self.load_controlnet_weight('hough')
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [vis_control_image] + results
-    @staticmethod
-    def preprocess_hed(
-        input_image: np.ndarray,
-        image_resolution: int,
-        detect_resolution: int,
-    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
-        control_image = apply_hed(resize_image(input_image, detect_resolution))
-        control_image = HWC3(control_image)
-        image = resize_image(input_image, image_resolution)
-        H, W = image.shape[:2]
-        control_image = cv2.resize(control_image, (W, H),
-                                   interpolation=cv2.INTER_LINEAR)
-        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
-            control_image)
-    @torch.inference_mode()
-    def process_hed(
-        self,
-        input_image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        detect_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-    ) -> list[PIL.Image.Image]:
-        control_image, vis_control_image = self.preprocess_hed(
-            input_image=input_image,
-            image_resolution=image_resolution,
-            detect_resolution=detect_resolution,
-        )
-        self.load_controlnet_weight('hed')
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [vis_control_image] + results
-    @staticmethod
-    def preprocess_scribble(
-        input_image: np.ndarray,
-        image_resolution: int,
-    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
-        image = resize_image(HWC3(input_image), image_resolution)
-        control_image = np.zeros_like(image, dtype=np.uint8)
-        control_image[np.min(image, axis=2) < 127] = 255
-        vis_control_image = 255 - control_image
-        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
-            vis_control_image)
     @torch.inference_mode()
-    def process_scribble(
-        self,
-        input_image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-    ) -> list[PIL.Image.Image]:
-        control_image, vis_control_image = self.preprocess_scribble(
-            input_image=input_image,
-            image_resolution=image_resolution,
-        )
-        self.load_controlnet_weight('scribble')
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [vis_control_image] + results
-    @staticmethod
-    def preprocess_scribble_interactive(
-        input_image: np.ndarray,
-        image_resolution: int,
-    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
-        image = resize_image(HWC3(input_image['mask'][:, :, 0]),
-                             image_resolution)
-        control_image = np.zeros_like(image, dtype=np.uint8)
-        control_image[np.min(image, axis=2) > 127] = 255
-        vis_control_image = 255 - control_image
-        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
-            vis_control_image)
     @torch.inference_mode()
-    def process_scribble_interactive(
-        self,
-        input_image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-    ) -> list[PIL.Image.Image]:
-        control_image, vis_control_image = self.preprocess_scribble_interactive(
-            input_image=input_image,
-            image_resolution=image_resolution,
-        )
-        self.load_controlnet_weight('scribble')
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [vis_control_image] + results
-    @staticmethod
-    def preprocess_fake_scribble(
-        input_image: np.ndarray,
-        image_resolution: int,
-        detect_resolution: int,
-    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
-        input_image = HWC3(input_image)
-        control_image = apply_hed(resize_image(input_image, detect_resolution))
-        control_image = HWC3(control_image)
-        image = resize_image(input_image, image_resolution)
-        H, W = image.shape[:2]
-        control_image = cv2.resize(control_image, (W, H),
-                                   interpolation=cv2.INTER_LINEAR)
-        control_image = nms(control_image, 127, 3.0)
-        control_image = cv2.GaussianBlur(control_image, (0, 0), 3.0)
-        control_image[control_image > 4] = 255
-        control_image[control_image < 255] = 0
-        vis_control_image = 255 - control_image
-        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
-            vis_control_image)
     @torch.inference_mode()
-    def process_fake_scribble(
-        self,
-        input_image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        detect_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-    ) -> list[PIL.Image.Image]:
-        control_image, vis_control_image = self.preprocess_fake_scribble(
-            input_image=input_image,
-            image_resolution=image_resolution,
-            detect_resolution=detect_resolution,
-        )
-        self.load_controlnet_weight('scribble')
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [vis_control_image] + results
-    @staticmethod
-    def preprocess_pose(
-        input_image: np.ndarray,
-        image_resolution: int,
-        detect_resolution: int,
-        is_pose_image: bool,
-    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
-        if not is_pose_image:
-            control_image, _ = apply_openpose(
-                resize_image(input_image, detect_resolution))
-            control_image = HWC3(control_image)
-            image = resize_image(input_image, image_resolution)
-            H, W = image.shape[:2]
-            control_image = cv2.resize(control_image, (W, H),
-                                       interpolation=cv2.INTER_NEAREST)
-        else:
-            control_image = resize_image(input_image, image_resolution)
-        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
-            control_image)
     @torch.inference_mode()
-    def process_pose(
-        self,
-        input_image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        detect_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        is_pose_image: bool,
-    ) -> list[PIL.Image.Image]:
-        control_image, vis_control_image = self.preprocess_pose(
-            input_image=input_image,
-            image_resolution=image_resolution,
-            detect_resolution=detect_resolution,
-            is_pose_image=is_pose_image,
-        )
-        self.load_controlnet_weight('pose')
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [vis_control_image] + results
-    @staticmethod
-    def preprocess_seg(
-        input_image: np.ndarray,
-        image_resolution: int,
-        detect_resolution: int,
-        is_segmentation_map: bool,
-    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
-        if not is_segmentation_map:
-            control_image = apply_uniformer(
-                resize_image(input_image, detect_resolution))
-            image = resize_image(input_image, image_resolution)
-            H, W = image.shape[:2]
-            control_image = cv2.resize(control_image, (W, H),
-                                       interpolation=cv2.INTER_NEAREST)
-        else:
-            control_image = resize_image(input_image, image_resolution)
-        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
-            control_image)
     @torch.inference_mode()
-    def process_seg(
-        self,
-        input_image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        detect_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        is_segmentation_map: bool,
-    ) -> list[PIL.Image.Image]:
-        control_image, vis_control_image = self.preprocess_seg(
-            input_image=input_image,
-            image_resolution=image_resolution,
-            detect_resolution=detect_resolution,
-            is_segmentation_map=is_segmentation_map,
-        )
-        self.load_controlnet_weight('seg')
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [vis_control_image] + results
-    @staticmethod
-    def preprocess_depth(
-        input_image: np.ndarray,
-        image_resolution: int,
-        detect_resolution: int,
-        is_depth_image: bool,
-    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
-        if not is_depth_image:
-            control_image, _ = apply_midas(
-                resize_image(input_image, detect_resolution))
-            control_image = HWC3(control_image)
-            image = resize_image(input_image, image_resolution)
-            H, W = image.shape[:2]
-            control_image = cv2.resize(control_image, (W, H),
-                                       interpolation=cv2.INTER_LINEAR)
-        else:
-            control_image = resize_image(input_image, image_resolution)
-        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
-            control_image)
     @torch.inference_mode()
-    def process_depth(
-        self,
-        input_image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        detect_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        is_depth_image: bool,
-    ) -> list[PIL.Image.Image]:
-        control_image, vis_control_image = self.preprocess_depth(
-            input_image=input_image,
-            image_resolution=image_resolution,
-            detect_resolution=detect_resolution,
-            is_depth_image=is_depth_image,
-        )
-        self.load_controlnet_weight('depth')
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [vis_control_image] + results
-    @staticmethod
-    def preprocess_normal(
-        input_image: np.ndarray,
-        image_resolution: int,
-        detect_resolution: int,
-        bg_threshold: float,
-        is_normal_image: bool,
-    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
-        if not is_normal_image:
-            _, control_image = apply_midas(resize_image(
-                input_image, detect_resolution),
-                                           bg_th=bg_threshold)
-            control_image = HWC3(control_image)
-            image = resize_image(input_image, image_resolution)
-            H, W = image.shape[:2]
-            control_image = cv2.resize(control_image, (W, H),
-                                       interpolation=cv2.INTER_LINEAR)
-        else:
-            control_image = resize_image(input_image, image_resolution)
-        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
-            control_image)
     @torch.inference_mode()
-    def process_normal(
-        self,
-        input_image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        detect_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        bg_threshold: float,
-        is_normal_image: bool,
-    ) -> list[PIL.Image.Image]:
-        control_image, vis_control_image = self.preprocess_normal(
-            input_image=input_image,
-            image_resolution=image_resolution,
-            detect_resolution=detect_resolution,
-            bg_threshold=bg_threshold,
-            is_normal_image=is_normal_image,
-        )
-        self.load_controlnet_weight('normal')
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [vis_control_image] + results

 # This file is adapted from gradio_*.py in https://github.com/lllyasviel/ControlNet/tree/f4748e3630d8141d7765e2bd9b1e348f47847707
+# The original license file is LICENSE.ControlNet this repo.
 from __future__ import annotations
 import pathlib
+import random
+import shlex
+import subprocess
 import sys
 import cv2
+import einops
 import numpy as np
 import torch
+from pytorch_lightning import seed_everything
+sys.path.append('ControlNet')
+import config
+from annotator.canny import apply_canny
+from annotator.hed import apply_hed, nms
+from annotator.midas import apply_midas
+from annotator.mlsd import apply_mlsd
+from annotator.openpose import apply_openpose
+from annotator.uniformer import apply_uniformer
+from annotator.util import HWC3, resize_image
+from cldm.model import create_model, load_state_dict
+from ldm.models.diffusion.ddim import DDIMSampler
+from share import *
 class Model:
+    WEIGHT_NAMES = {
+        'canny': 'control_sd15_canny.pth',
+        'hough': 'control_sd15_mlsd.pth',
+        'hed': 'control_sd15_hed.pth',
+        'scribble': 'control_sd15_scribble.pth',
+        'pose': 'control_sd15_openpose.pth',
+        'seg': 'control_sd15_seg.pth',
+        'depth': 'control_sd15_depth.pth',
+        'normal': 'control_sd15_normal.pth',
+    }
     def __init__(self,
+                 model_config_path: str = 'ControlNet/models/cldm_v15.yaml',
+                 model_dir: str = 'models'):
         self.device = torch.device(
             'cuda:0' if torch.cuda.is_available() else 'cpu')
+        self.model = create_model(model_config_path).to(self.device)
+        self.ddim_sampler = DDIMSampler(self.model)
         self.task_name = ''
+        self.model_dir = pathlib.Path(model_dir)
+        self.download_models()
+    def load_weight(self, task_name: str) -> None:
         if task_name == self.task_name:
             return
+        weight_path = self.get_weight_path(task_name)
+        self.model.load_state_dict(
+            load_state_dict(weight_path, location=self.device))
         self.task_name = task_name
+    def get_weight_path(self, task_name: str) -> str:
+        if 'scribble' in task_name:
+            task_name = 'scribble'
+        return f'{self.model_dir}/{self.WEIGHT_NAMES[task_name]}'
+    def download_models(self):
+        self.model_dir.mkdir(exist_ok=True, parents=True)
+        for name in self.WEIGHT_NAMES.values():
+            out_path = self.model_dir / name
+            if out_path.exists():
+                continue
+            subprocess.run(
+                shlex.split(
+                    f'wget https://huggingface.co/ckpt/ControlNet/resolve/main/{name} -O {out_path}'
+                ))
+    @torch.inference_mode()
+    def process_canny(self, input_image, prompt, a_prompt, n_prompt,
+                      num_samples, image_resolution, ddim_steps, scale, seed,
+                      eta, low_threshold, high_threshold):
+        self.load_weight('canny')
+        img = resize_image(HWC3(input_image), image_resolution)
+        H, W, C = img.shape
+        detected_map = apply_canny(img, low_threshold, high_threshold)
+        detected_map = HWC3(detected_map)
+        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
+        control = torch.stack([control for _ in range(num_samples)], dim=0)
+        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
         if seed == -1:
+            seed = random.randint(0, 65535)
+        seed_everything(seed)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        cond = {
+            'c_concat': [control],
+            'c_crossattn': [
+                self.model.get_learned_conditioning(
+                    [prompt + ', ' + a_prompt] * num_samples)
+            ]
+        }
+        un_cond = {
+            'c_concat': [control],
+            'c_crossattn':
+            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
+        }
+        shape = (4, H // 8, W // 8)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=True)
+        samples, intermediates = self.ddim_sampler.sample(
+            ddim_steps,
+            num_samples,
+            shape,
+            cond,
+            verbose=False,
+            eta=eta,
+            unconditional_guidance_scale=scale,
+            unconditional_conditioning=un_cond)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        x_samples = self.model.decode_first_stage(samples)
+        x_samples = (
+            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
+            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
+        results = [x_samples[i] for i in range(num_samples)]
+        return [255 - detected_map] + results
     @torch.inference_mode()
+    def process_hough(self, input_image, prompt, a_prompt, n_prompt,
+                      num_samples, image_resolution, detect_resolution,
+                      ddim_steps, scale, seed, eta, value_threshold,
+                      distance_threshold):
+        self.load_weight('hough')
         input_image = HWC3(input_image)
+        detected_map = apply_mlsd(resize_image(input_image, detect_resolution),
+                                  value_threshold, distance_threshold)
+        detected_map = HWC3(detected_map)
+        img = resize_image(input_image, image_resolution)
+        H, W, C = img.shape
+        detected_map = cv2.resize(detected_map, (W, H),
+                                  interpolation=cv2.INTER_NEAREST)
+        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
+        control = torch.stack([control for _ in range(num_samples)], dim=0)
+        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
+        if seed == -1:
+            seed = random.randint(0, 65535)
+        seed_everything(seed)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        cond = {
+            'c_concat': [control],
+            'c_crossattn': [
+                self.model.get_learned_conditioning(
+                    [prompt + ', ' + a_prompt] * num_samples)
+            ]
+        }
+        un_cond = {
+            'c_concat': [control],
+            'c_crossattn':
+            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
+        }
+        shape = (4, H // 8, W // 8)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=True)
+        samples, intermediates = self.ddim_sampler.sample(
+            ddim_steps,
+            num_samples,
+            shape,
+            cond,
+            verbose=False,
+            eta=eta,
+            unconditional_guidance_scale=scale,
+            unconditional_conditioning=un_cond)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        x_samples = self.model.decode_first_stage(samples)
+        x_samples = (
+            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
+            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
+        results = [x_samples[i] for i in range(num_samples)]
+        return [
+            255 - cv2.dilate(detected_map,
+                             np.ones(shape=(3, 3), dtype=np.uint8),
+                             iterations=1)
+        ] + results
     @torch.inference_mode()
+    def process_hed(self, input_image, prompt, a_prompt, n_prompt, num_samples,
+                    image_resolution, detect_resolution, ddim_steps, scale,
+                    seed, eta):
+        self.load_weight('hed')
         input_image = HWC3(input_image)
+        detected_map = apply_hed(resize_image(input_image, detect_resolution))
+        detected_map = HWC3(detected_map)
+        img = resize_image(input_image, image_resolution)
+        H, W, C = img.shape
+        detected_map = cv2.resize(detected_map, (W, H),
+                                  interpolation=cv2.INTER_LINEAR)
+        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
+        control = torch.stack([control for _ in range(num_samples)], dim=0)
+        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
+        if seed == -1:
+            seed = random.randint(0, 65535)
+        seed_everything(seed)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        cond = {
+            'c_concat': [control],
+            'c_crossattn': [
+                self.model.get_learned_conditioning(
+                    [prompt + ', ' + a_prompt] * num_samples)
+            ]
+        }
+        un_cond = {
+            'c_concat': [control],
+            'c_crossattn':
+            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
+        }
+        shape = (4, H // 8, W // 8)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=True)
+        samples, intermediates = self.ddim_sampler.sample(
+            ddim_steps,
+            num_samples,
+            shape,
+            cond,
+            verbose=False,
+            eta=eta,
+            unconditional_guidance_scale=scale,
+            unconditional_conditioning=un_cond)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        x_samples = self.model.decode_first_stage(samples)
+        x_samples = (
+            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
+            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
+        results = [x_samples[i] for i in range(num_samples)]
+        return [detected_map] + results
     @torch.inference_mode()
+    def process_scribble(self, input_image, prompt, a_prompt, n_prompt,
+                         num_samples, image_resolution, ddim_steps, scale,
+                         seed, eta):
+        self.load_weight('scribble')
+        img = resize_image(HWC3(input_image), image_resolution)
+        H, W, C = img.shape
+        detected_map = np.zeros_like(img, dtype=np.uint8)
+        detected_map[np.min(img, axis=2) < 127] = 255
+        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
+        control = torch.stack([control for _ in range(num_samples)], dim=0)
+        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
+        if seed == -1:
+            seed = random.randint(0, 65535)
+        seed_everything(seed)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        cond = {
+            'c_concat': [control],
+            'c_crossattn': [
+                self.model.get_learned_conditioning(
+                    [prompt + ', ' + a_prompt] * num_samples)
+            ]
+        }
+        un_cond = {
+            'c_concat': [control],
+            'c_crossattn':
+            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
+        }
+        shape = (4, H // 8, W // 8)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=True)
+        samples, intermediates = self.ddim_sampler.sample(
+            ddim_steps,
+            num_samples,
+            shape,
+            cond,
+            verbose=False,
+            eta=eta,
+            unconditional_guidance_scale=scale,
+            unconditional_conditioning=un_cond)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        x_samples = self.model.decode_first_stage(samples)
+        x_samples = (
+            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
+            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
+        results = [x_samples[i] for i in range(num_samples)]
+        return [255 - detected_map] + results
     @torch.inference_mode()
+    def process_scribble_interactive(self, input_image, prompt, a_prompt,
+                                     n_prompt, num_samples, image_resolution,
+                                     ddim_steps, scale, seed, eta):
+        self.load_weight('scribble')
+        img = resize_image(HWC3(input_image['mask'][:, :, 0]),
+                           image_resolution)
+        H, W, C = img.shape
+        detected_map = np.zeros_like(img, dtype=np.uint8)
+        detected_map[np.min(img, axis=2) > 127] = 255
+        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
+        control = torch.stack([control for _ in range(num_samples)], dim=0)
+        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
+        if seed == -1:
+            seed = random.randint(0, 65535)
+        seed_everything(seed)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        cond = {
+            'c_concat': [control],
+            'c_crossattn': [
+                self.model.get_learned_conditioning(
+                    [prompt + ', ' + a_prompt] * num_samples)
+            ]
+        }
+        un_cond = {
+            'c_concat': [control],
+            'c_crossattn':
+            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
+        }
+        shape = (4, H // 8, W // 8)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=True)
+        samples, intermediates = self.ddim_sampler.sample(
+            ddim_steps,
+            num_samples,
+            shape,
+            cond,
+            verbose=False,
+            eta=eta,
+            unconditional_guidance_scale=scale,
+            unconditional_conditioning=un_cond)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        x_samples = self.model.decode_first_stage(samples)
+        x_samples = (
+            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
+            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
+        results = [x_samples[i] for i in range(num_samples)]
+        return [255 - detected_map] + results
     @torch.inference_mode()
+    def process_fake_scribble(self, input_image, prompt, a_prompt, n_prompt,
+                              num_samples, image_resolution, detect_resolution,
+                              ddim_steps, scale, seed, eta):
+        self.load_weight('scribble')
         input_image = HWC3(input_image)
+        detected_map = apply_hed(resize_image(input_image, detect_resolution))
+        detected_map = HWC3(detected_map)
+        img = resize_image(input_image, image_resolution)
+        H, W, C = img.shape
+        detected_map = cv2.resize(detected_map, (W, H),
+                                  interpolation=cv2.INTER_LINEAR)
+        detected_map = nms(detected_map, 127, 3.0)
+        detected_map = cv2.GaussianBlur(detected_map, (0, 0), 3.0)
+        detected_map[detected_map > 4] = 255
+        detected_map[detected_map < 255] = 0
+        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
+        control = torch.stack([control for _ in range(num_samples)], dim=0)
+        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
+        if seed == -1:
+            seed = random.randint(0, 65535)
+        seed_everything(seed)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        cond = {
+            'c_concat': [control],
+            'c_crossattn': [
+                self.model.get_learned_conditioning(
+                    [prompt + ', ' + a_prompt] * num_samples)
+            ]
+        }
+        un_cond = {
+            'c_concat': [control],
+            'c_crossattn':
+            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
+        }
+        shape = (4, H // 8, W // 8)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=True)
+        samples, intermediates = self.ddim_sampler.sample(
+            ddim_steps,
+            num_samples,
+            shape,
+            cond,
+            verbose=False,
+            eta=eta,
+            unconditional_guidance_scale=scale,
+            unconditional_conditioning=un_cond)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        x_samples = self.model.decode_first_stage(samples)
+        x_samples = (
+            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
+            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
+        results = [x_samples[i] for i in range(num_samples)]
+        return [255 - detected_map] + results
     @torch.inference_mode()
+    def process_pose(self, input_image, prompt, a_prompt, n_prompt,
+                     num_samples, image_resolution, detect_resolution,
+                     ddim_steps, scale, seed, eta):
+        self.load_weight('pose')
         input_image = HWC3(input_image)
+        detected_map, _ = apply_openpose(
+            resize_image(input_image, detect_resolution))
+        detected_map = HWC3(detected_map)
+        img = resize_image(input_image, image_resolution)
+        H, W, C = img.shape
+        detected_map = cv2.resize(detected_map, (W, H),
+                                  interpolation=cv2.INTER_NEAREST)
+        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
+        control = torch.stack([control for _ in range(num_samples)], dim=0)
+        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
+        if seed == -1:
+            seed = random.randint(0, 65535)
+        seed_everything(seed)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        cond = {
+            'c_concat': [control],
+            'c_crossattn': [
+                self.model.get_learned_conditioning(
+                    [prompt + ', ' + a_prompt] * num_samples)
+            ]
+        }
+        un_cond = {
+            'c_concat': [control],
+            'c_crossattn':
+            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
+        }
+        shape = (4, H // 8, W // 8)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=True)
+        samples, intermediates = self.ddim_sampler.sample(
+            ddim_steps,
+            num_samples,
+            shape,
+            cond,
+            verbose=False,
+            eta=eta,
+            unconditional_guidance_scale=scale,
+            unconditional_conditioning=un_cond)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        x_samples = self.model.decode_first_stage(samples)
+        x_samples = (
+            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
+            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
+        results = [x_samples[i] for i in range(num_samples)]
+        return [detected_map] + results
     @torch.inference_mode()
+    def process_seg(self, input_image, prompt, a_prompt, n_prompt, num_samples,
+                    image_resolution, detect_resolution, ddim_steps, scale,
+                    seed, eta):
+        self.load_weight('seg')
         input_image = HWC3(input_image)
+        detected_map = apply_uniformer(
+            resize_image(input_image, detect_resolution))
+        img = resize_image(input_image, image_resolution)
+        H, W, C = img.shape
+        detected_map = cv2.resize(detected_map, (W, H),
+                                  interpolation=cv2.INTER_NEAREST)
+        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
+        control = torch.stack([control for _ in range(num_samples)], dim=0)
+        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
+        if seed == -1:
+            seed = random.randint(0, 65535)
+        seed_everything(seed)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        cond = {
+            'c_concat': [control],
+            'c_crossattn': [
+                self.model.get_learned_conditioning(
+                    [prompt + ', ' + a_prompt] * num_samples)
+            ]
+        }
+        un_cond = {
+            'c_concat': [control],
+            'c_crossattn':
+            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
+        }
+        shape = (4, H // 8, W // 8)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=True)
+        samples, intermediates = self.ddim_sampler.sample(
+            ddim_steps,
+            num_samples,
+            shape,
+            cond,
+            verbose=False,
+            eta=eta,
+            unconditional_guidance_scale=scale,
+            unconditional_conditioning=un_cond)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        x_samples = self.model.decode_first_stage(samples)
+        x_samples = (
+            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
+            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
+        results = [x_samples[i] for i in range(num_samples)]
+        return [detected_map] + results
     @torch.inference_mode()
+    def process_depth(self, input_image, prompt, a_prompt, n_prompt,
+                      num_samples, image_resolution, detect_resolution,
+                      ddim_steps, scale, seed, eta):
+        self.load_weight('depth')
         input_image = HWC3(input_image)
+        detected_map, _ = apply_midas(
+            resize_image(input_image, detect_resolution))
+        detected_map = HWC3(detected_map)
+        img = resize_image(input_image, image_resolution)
+        H, W, C = img.shape
+        detected_map = cv2.resize(detected_map, (W, H),
+                                  interpolation=cv2.INTER_LINEAR)
+        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
+        control = torch.stack([control for _ in range(num_samples)], dim=0)
+        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
+        if seed == -1:
+            seed = random.randint(0, 65535)
+        seed_everything(seed)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        cond = {
+            'c_concat': [control],
+            'c_crossattn': [
+                self.model.get_learned_conditioning(
+                    [prompt + ', ' + a_prompt] * num_samples)
+            ]
+        }
+        un_cond = {
+            'c_concat': [control],
+            'c_crossattn':
+            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
+        }
+        shape = (4, H // 8, W // 8)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=True)
+        samples, intermediates = self.ddim_sampler.sample(
+            ddim_steps,
+            num_samples,
+            shape,
+            cond,
+            verbose=False,
+            eta=eta,
+            unconditional_guidance_scale=scale,
+            unconditional_conditioning=un_cond)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        x_samples = self.model.decode_first_stage(samples)
+        x_samples = (
+            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
+            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
+        results = [x_samples[i] for i in range(num_samples)]
+        return [detected_map] + results
     @torch.inference_mode()
+    def process_normal(self, input_image, prompt, a_prompt, n_prompt,
+                       num_samples, image_resolution, detect_resolution,
+                       ddim_steps, scale, seed, eta, bg_threshold):
+        self.load_weight('normal')
+        input_image = HWC3(input_image)
+        _, detected_map = apply_midas(resize_image(input_image,
+                                                   detect_resolution),
+                                      bg_th=bg_threshold)
+        detected_map = HWC3(detected_map)
+        img = resize_image(input_image, image_resolution)
+        H, W, C = img.shape
+        detected_map = cv2.resize(detected_map, (W, H),
+                                  interpolation=cv2.INTER_LINEAR)
+        control = torch.from_numpy(
+            detected_map[:, :, ::-1].copy()).float().cuda() / 255.0
+        control = torch.stack([control for _ in range(num_samples)], dim=0)
+        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
+        if seed == -1:
+            seed = random.randint(0, 65535)
+        seed_everything(seed)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        cond = {
+            'c_concat': [control],
+            'c_crossattn': [
+                self.model.get_learned_conditioning(
+                    [prompt + ', ' + a_prompt] * num_samples)
+            ]
+        }
+        un_cond = {
+            'c_concat': [control],
+            'c_crossattn':
+            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
+        }
+        shape = (4, H // 8, W // 8)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=True)
+        samples, intermediates = self.ddim_sampler.sample(
+            ddim_steps,
+            num_samples,
+            shape,
+            cond,
+            verbose=False,
+            eta=eta,
+            unconditional_guidance_scale=scale,
+            unconditional_conditioning=un_cond)
+        if config.save_memory:
+            self.model.low_vram_shift(is_diffusing=False)
+        x_samples = self.model.decode_first_stage(samples)
+        x_samples = (
+            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
+            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
+        results = [x_samples[i] for i in range(num_samples)]
+        return [detected_map] + results

notebooks/notebook.ipynb DELETED Viewed

@@ -1,80 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "8CnkIPtjn8Dc"
-   },
-   "outputs": [],
-   "source": [
-    "!git clone --recursive https://huggingface.co/spaces/hysts/ControlNet"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "IZlaYNTWoFPK"
-   },
-   "outputs": [],
-   "source": [
-    "%cd ControlNet"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "0zhLFnZUoWdp"
-   },
-   "outputs": [],
-   "source": [
-    "!cd ControlNet && git apply ../patch && cd .."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "P_fzYrLvoIcI"
-   },
-   "outputs": [],
-   "source": [
-    "!pip install -q -r requirements.txt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "GOfGng5Woktd"
-   },
-   "outputs": [],
-   "source": [
-    "import app"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "7Cued230ol7T"
-   },
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "provenance": []
-  },
-  "gpuClass": "standard",
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}

patch CHANGED Viewed

@@ -113,16 +113,3 @@ index 500e53c..4061dbe 100644
  model = init_segmentor(config_file, checkpoint_file).cuda()
-diff --git a/annotator/util.py b/annotator/util.py
-index 7cde937..10a6d58 100644
---- a/annotator/util.py
-+++ b/annotator/util.py
-@@ -25,7 +25,7 @@ def resize_image(input_image, resolution):
-     H, W, C = input_image.shape
-     H = float(H)
-     W = float(W)
--    k = float(resolution) / min(H, W)
-+    k = float(resolution) / max(H, W)
-     H *= k
-     W *= k
-     H = int(np.round(H / 64.0)) * 64


113	model = init_segmentor(config_file, checkpoint_file).cuda()
114
115

requirements.txt CHANGED Viewed

@@ -1,9 +1,7 @@
 addict==2.4.0
 albumentations==1.3.0
 einops==0.6.0
-git+https://github.com/huggingface/accelerate@78151f8
-git+https://github.com/huggingface/diffusers@fa6d52d
-gradio==3.36.1
 imageio==2.25.0
 imageio-ffmpeg==0.4.8
 kornia==0.6.9
@@ -13,10 +11,8 @@ opencv-contrib-python==4.7.0.68
 opencv-python-headless==4.7.0.68
 prettytable==3.6.0
 pytorch-lightning==1.9.0
-safetensors==0.2.8
 timm==0.6.12
 torch==1.13.1
 torchvision==0.14.1
 transformers==4.26.1
-xformers==0.0.16
 yapf==0.32.0

 addict==2.4.0
 albumentations==1.3.0
 einops==0.6.0
+gradio==3.18.0
 imageio==2.25.0
 imageio-ffmpeg==0.4.8
 kornia==0.6.9
 opencv-python-headless==4.7.0.68
 prettytable==3.6.0
 pytorch-lightning==1.9.0
 timm==0.6.12
 torch==1.13.1
 torchvision==0.14.1
 transformers==4.26.1
 yapf==0.32.0

style.css CHANGED Viewed

@@ -1,8 +1,3 @@
 h1 {
   text-align: center;
 }
-.note {
-  text-align: center;
-  font-size: 150%;
-}

 h1 {
   text-align: center;
 }