ControlNet-with-Anything-v4

Runtime error

App Files Files Community

hysts HF Staff commited on Mar 4, 2023

Commit

b44d4b1

1 Parent(s): b7075f8

Update to the original Space

Browse files

Files changed (14) hide show

README.md +1 -1
app.py +83 -58
gradio_canny2image.py → app_canny.py +50 -33
gradio_depth2image.py → app_depth.py +40 -22
gradio_fake_scribble2image.py → app_fake_scribble.py +37 -22
gradio_hed2image.py → app_hed.py +37 -22
gradio_hough2image.py → app_hough.py +41 -25
gradio_normal2image.py → app_normal.py +41 -23
gradio_pose2image.py → app_pose.py +43 -22
gradio_scribble2image.py → app_scribble.py +36 -22
gradio_scribble2image_interactive.py → app_scribble_interactive.py +36 -22
gradio_seg2image.py → app_seg.py +40 -22
model.py +564 -698
requirements.txt +3 -2

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 😻
 colorFrom: pink
 colorTo: blue
 sdk: gradio
-sdk_version: 3.18.0
 python_version: 3.10.9
 app_file: app.py
 pinned: false

 colorFrom: pink
 colorTo: blue
 sdk: gradio
+sdk_version: 3.20.0
 python_version: 3.10.9
 app_file: app.py
 pinned: false

app.py CHANGED Viewed

@@ -30,92 +30,117 @@ for name in names:
         continue
     subprocess.run(shlex.split(command), cwd='ControlNet/annotator/ckpts/')
-from gradio_canny2image import create_demo as create_demo_canny
-from gradio_depth2image import create_demo as create_demo_depth
-from gradio_fake_scribble2image import create_demo as create_demo_fake_scribble
-from gradio_hed2image import create_demo as create_demo_hed
-from gradio_hough2image import create_demo as create_demo_hough
-from gradio_normal2image import create_demo as create_demo_normal
-from gradio_pose2image import create_demo as create_demo_pose
-from gradio_scribble2image import create_demo as create_demo_scribble
-from gradio_scribble2image_interactive import \
     create_demo as create_demo_scribble_interactive
-from gradio_seg2image import create_demo as create_demo_seg
-from model import (DEFAULT_BASE_MODEL_FILENAME, DEFAULT_BASE_MODEL_REPO,
-                   DEFAULT_BASE_MODEL_URL, Model)
-MAX_IMAGES = 1
-DESCRIPTION = '''# [ControlNet](https://github.com/lllyasviel/ControlNet)
-This Space is a modified version of [this Space](https://huggingface.co/spaces/hysts/ControlNet).
-The original Space uses [Stable Diffusion v1.5](https://huggingface.co/runwayml/stable-diffusion-v1-5) as the base model, but [Anything v4.0](https://huggingface.co/andite/anything-v4.0) is used in this Space.
-'''
 SPACE_ID = os.getenv('SPACE_ID')
-ALLOW_CHANGING_BASE_MODEL = SPACE_ID != 'hysts/ControlNet-with-other-models'
-if not ALLOW_CHANGING_BASE_MODEL:
-    DESCRIPTION += 'In this Space, the base model is not allowed to be changed so as not to slow down the demo, but it can be changed if you duplicate the Space.'
 if SPACE_ID is not None:
-    DESCRIPTION += f'''<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.<br/>
-<a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true">
-<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
-<p/>
 '''
-model = Model()
 with gr.Blocks(css='style.css') as demo:
     gr.Markdown(DESCRIPTION)
     with gr.Tabs():
         with gr.TabItem('Canny'):
-            create_demo_canny(model.process_canny, max_images=MAX_IMAGES)
         with gr.TabItem('Hough'):
-            create_demo_hough(model.process_hough, max_images=MAX_IMAGES)
         with gr.TabItem('HED'):
-            create_demo_hed(model.process_hed, max_images=MAX_IMAGES)
         with gr.TabItem('Scribble'):
-            create_demo_scribble(model.process_scribble, max_images=MAX_IMAGES)
         with gr.TabItem('Scribble Interactive'):
             create_demo_scribble_interactive(
-                model.process_scribble_interactive, max_images=MAX_IMAGES)
         with gr.TabItem('Fake Scribble'):
             create_demo_fake_scribble(model.process_fake_scribble,
-                                      max_images=MAX_IMAGES)
         with gr.TabItem('Pose'):
-            create_demo_pose(model.process_pose, max_images=MAX_IMAGES)
         with gr.TabItem('Segmentation'):
-            create_demo_seg(model.process_seg, max_images=MAX_IMAGES)
         with gr.TabItem('Depth'):
-            create_demo_depth(model.process_depth, max_images=MAX_IMAGES)
         with gr.TabItem('Normal map'):
-            create_demo_normal(model.process_normal, max_images=MAX_IMAGES)
     with gr.Accordion(label='Base model', open=False):
-        current_base_model = gr.Text(label='Current base model',
-                                     value=DEFAULT_BASE_MODEL_URL)
         with gr.Row():
-            base_model_repo = gr.Text(label='Base model repo',
-                                      max_lines=1,
-                                      placeholder=DEFAULT_BASE_MODEL_REPO,
-                                      interactive=ALLOW_CHANGING_BASE_MODEL)
-            base_model_filename = gr.Text(
-                label='Base model file',
-                max_lines=1,
-                placeholder=DEFAULT_BASE_MODEL_FILENAME,
-                interactive=ALLOW_CHANGING_BASE_MODEL)
-        change_base_model_button = gr.Button('Change base model')
-        gr.Markdown(
-            '''- You can use other base models by specifying the repository name and filename.
-The base model must be compatible with Stable Diffusion v1.5.''')
     change_base_model_button.click(fn=model.set_base_model,
-                                   inputs=[
-                                       base_model_repo,
-                                       base_model_filename,
-                                   ],
                                    outputs=current_base_model)
 demo.queue(api_open=False).launch()

         continue
     subprocess.run(shlex.split(command), cwd='ControlNet/annotator/ckpts/')
+from app_canny import create_demo as create_demo_canny
+from app_depth import create_demo as create_demo_depth
+from app_fake_scribble import create_demo as create_demo_fake_scribble
+from app_hed import create_demo as create_demo_hed
+from app_hough import create_demo as create_demo_hough
+from app_normal import create_demo as create_demo_normal
+from app_pose import create_demo as create_demo_pose
+from app_scribble import create_demo as create_demo_scribble
+from app_scribble_interactive import \
     create_demo as create_demo_scribble_interactive
+from app_seg import create_demo as create_demo_seg
+from model import Model, download_all_controlnet_weights
+DESCRIPTION = '# [ControlNet](https://github.com/lllyasviel/ControlNet)'
 SPACE_ID = os.getenv('SPACE_ID')
+ALLOW_CHANGING_BASE_MODEL = SPACE_ID != 'hysts/ControlNet'
 if SPACE_ID is not None:
+    DESCRIPTION += f'''<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>
 '''
+MAX_IMAGES = int(os.getenv('MAX_IMAGES', '3'))
+DEFAULT_NUM_IMAGES = min(MAX_IMAGES, int(os.getenv('DEFAULT_NUM_IMAGES', '1')))
+if os.getenv('SYSTEM') == 'spaces':
+    download_all_controlnet_weights()
+DEFAULT_MODEL_ID = os.getenv('DEFAULT_MODEL_ID',
+                             'runwayml/stable-diffusion-v1-5')
+model = Model(base_model_id=DEFAULT_MODEL_ID, task_name='canny')
 with gr.Blocks(css='style.css') as demo:
     gr.Markdown(DESCRIPTION)
     with gr.Tabs():
         with gr.TabItem('Canny'):
+            create_demo_canny(model.process_canny,
+                              max_images=MAX_IMAGES,
+                              default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Hough'):
+            create_demo_hough(model.process_hough,
+                              max_images=MAX_IMAGES,
+                              default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('HED'):
+            create_demo_hed(model.process_hed,
+                            max_images=MAX_IMAGES,
+                            default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Scribble'):
+            create_demo_scribble(model.process_scribble,
+                                 max_images=MAX_IMAGES,
+                                 default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Scribble Interactive'):
             create_demo_scribble_interactive(
+                model.process_scribble_interactive,
+                max_images=MAX_IMAGES,
+                default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Fake Scribble'):
             create_demo_fake_scribble(model.process_fake_scribble,
+                                      max_images=MAX_IMAGES,
+                                      default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Pose'):
+            create_demo_pose(model.process_pose,
+                             max_images=MAX_IMAGES,
+                             default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Segmentation'):
+            create_demo_seg(model.process_seg,
+                            max_images=MAX_IMAGES,
+                            default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Depth'):
+            create_demo_depth(model.process_depth,
+                              max_images=MAX_IMAGES,
+                              default_num_images=DEFAULT_NUM_IMAGES)
         with gr.TabItem('Normal map'):
+            create_demo_normal(model.process_normal,
+                               max_images=MAX_IMAGES,
+                               default_num_images=DEFAULT_NUM_IMAGES)
     with gr.Accordion(label='Base model', open=False):
         with gr.Row():
+            with gr.Column():
+                current_base_model = gr.Text(label='Current base model')
+            with gr.Column(scale=0.3):
+                check_base_model_button = gr.Button('Check current base model')
+        with gr.Row():
+            with gr.Column():
+                new_base_model_id = gr.Text(
+                    label='New base model',
+                    max_lines=1,
+                    placeholder='runwayml/stable-diffusion-v1-5',
+                    info=
+                    'The base model must be compatible with Stable Diffusion v1.5.',
+                    interactive=ALLOW_CHANGING_BASE_MODEL)
+            with gr.Column(scale=0.3):
+                change_base_model_button = gr.Button('Change base model')
+        if not ALLOW_CHANGING_BASE_MODEL:
+            gr.Markdown(
+                '''The base model is not allowed to be changed in this Space so as not to slow down the demo, but it can be changed if you duplicate the Space. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a>'''
+            )
+    gr.Markdown(
+        '[Space using Anything-v4.0 as base model](https://huggingface.co/spaces/hysts/ControlNet-with-other-models)'
+    )
+    check_base_model_button.click(fn=lambda: model.base_model_id,
+                                  outputs=current_base_model,
+                                  queue=False)
+    new_base_model_id.submit(fn=model.set_base_model,
+                             inputs=new_base_model_id,
+                             outputs=current_base_model)
     change_base_model_button.click(fn=model.set_base_model,
+                                   inputs=new_base_model_id,
                                    outputs=current_base_model)
 demo.queue(api_open=False).launch()

gradio_canny2image.py → app_canny.py RENAMED Viewed

@@ -3,7 +3,7 @@
 import gradio as gr
-def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Canny Edge Maps')
@@ -16,39 +16,40 @@ def create_demo(process, max_images=12):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
                                                  maximum=768,
                                                  value=512,
                                                  step=256)
-                    low_threshold = gr.Slider(label='Canny low threshold',
-                                              minimum=1,
-                                              maximum=255,
-                                              value=100,
-                                              step=1)
-                    high_threshold = gr.Slider(label='Canny high threshold',
-                                               minimum=1,
-                                               maximum=255,
-                                               value=200,
-                                               step=1)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -58,17 +59,33 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, ddim_steps, scale, seed, eta, low_threshold,
-            high_threshold
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='canny')
     return demo

 import gradio as gr
+def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Canny Edge Maps')
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
                                                  maximum=768,
                                                  value=512,
                                                  step=256)
+                    canny_low_threshold = gr.Slider(
+                        label='Canny low threshold',
+                        minimum=1,
+                        maximum=255,
+                        value=100,
+                        step=1)
+                    canny_high_threshold = gr.Slider(
+                        label='Canny high threshold',
+                        minimum=1,
+                        maximum=255,
+                        value=200,
+                        step=1)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
+            canny_low_threshold,
+            canny_high_threshold,
         ]
+        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='canny')
     return demo
+if __name__ == '__main__':
+    from model import Model
+    model = Model()
+    demo = create_demo(model.process_canny)
+    demo.queue().launch()

gradio_depth2image.py → app_depth.py RENAMED Viewed

@@ -3,7 +3,7 @@
 import gradio as gr
-def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Depth Maps')
@@ -13,10 +13,12 @@ def create_demo(process, max_images=12):
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
@@ -28,22 +30,21 @@ def create_demo(process, max_images=12):
                                                   maximum=1024,
                                                   value=384,
                                                   step=1)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -53,16 +54,33 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='depth')
     return demo

 import gradio as gr
+def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Depth Maps')
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
+                    is_depth_image = gr.Checkbox(label='Is depth image',
+                                                 value=False)
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
                                                   maximum=1024,
                                                   value=384,
                                                   step=1)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            detect_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
+            is_depth_image,
         ]
+        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='depth')
     return demo
+if __name__ == '__main__':
+    from model import Model
+    model = Model()
+    demo = create_demo(model.process_depth)
+    demo.queue().launch()

gradio_fake_scribble2image.py → app_fake_scribble.py RENAMED Viewed

@@ -3,7 +3,7 @@
 import gradio as gr
-def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Fake Scribble Maps')
@@ -16,7 +16,7 @@ def create_demo(process, max_images=12):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
@@ -28,22 +28,21 @@ def create_demo(process, max_images=12):
                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -53,16 +52,32 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='fake_scribble')
     return demo

 import gradio as gr
+def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Fake Scribble Maps')
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            detect_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
         ]
+        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='fake_scribble')
     return demo
+if __name__ == '__main__':
+    from model import Model
+    model = Model()
+    demo = create_demo(model.process_fake_scribble)
+    demo.queue().launch()

gradio_hed2image.py → app_hed.py RENAMED Viewed

@@ -3,7 +3,7 @@
 import gradio as gr
-def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with HED Maps')
@@ -16,7 +16,7 @@ def create_demo(process, max_images=12):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
@@ -28,22 +28,21 @@ def create_demo(process, max_images=12):
                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -53,16 +52,32 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='hed')
     return demo

 import gradio as gr
+def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with HED Maps')
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            detect_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
         ]
+        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='hed')
     return demo
+if __name__ == '__main__':
+    from model import Model
+    model = Model()
+    demo = create_demo(model.process_hed)
+    demo.queue().launch()

gradio_hough2image.py → app_hough.py RENAMED Viewed

@@ -3,7 +3,7 @@
 import gradio as gr
-def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Hough Line Maps')
@@ -16,7 +16,7 @@ def create_demo(process, max_images=12):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
@@ -28,34 +28,33 @@ def create_demo(process, max_images=12):
                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
-                    value_threshold = gr.Slider(
                         label='Hough value threshold (MLSD)',
                         minimum=0.01,
                         maximum=2.0,
                         value=0.1,
                         step=0.01)
-                    distance_threshold = gr.Slider(
                         label='Hough distance threshold (MLSD)',
                         minimum=0.01,
                         maximum=20.0,
                         value=0.1,
                         step=0.01)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -65,17 +64,34 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, detect_resolution, ddim_steps, scale, seed, eta,
-            value_threshold, distance_threshold
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='hough')
     return demo

 import gradio as gr
+def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Hough Line Maps')
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
+                    mlsd_value_threshold = gr.Slider(
                         label='Hough value threshold (MLSD)',
                         minimum=0.01,
                         maximum=2.0,
                         value=0.1,
                         step=0.01)
+                    mlsd_distance_threshold = gr.Slider(
                         label='Hough distance threshold (MLSD)',
                         minimum=0.01,
                         maximum=20.0,
                         value=0.1,
                         step=0.01)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            detect_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
+            mlsd_value_threshold,
+            mlsd_distance_threshold,
         ]
+        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='hough')
     return demo
+if __name__ == '__main__':
+    from model import Model
+    model = Model()
+    demo = create_demo(model.process_hough)
+    demo.queue().launch()

gradio_normal2image.py → app_normal.py RENAMED Viewed

@@ -3,7 +3,7 @@
 import gradio as gr
-def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Normal Maps')
@@ -13,10 +13,12 @@ def create_demo(process, max_images=12):
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
@@ -34,22 +36,21 @@ def create_demo(process, max_images=12):
                         maximum=1.0,
                         value=0.4,
                         step=0.01)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -59,17 +60,34 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, detect_resolution, ddim_steps, scale, seed, eta,
-            bg_threshold
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='normal')
     return demo

 import gradio as gr
+def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Normal Maps')
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
+                    is_normal_image = gr.Checkbox(label='Is normal image',
+                                                  value=False)
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
                         maximum=1.0,
                         value=0.4,
                         step=0.01)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            detect_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
+            bg_threshold,
+            is_normal_image,
         ]
+        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='normal')
     return demo
+if __name__ == '__main__':
+    from model import Model
+    model = Model()
+    demo = create_demo(model.process_normal)
+    demo.queue().launch()

gradio_pose2image.py → app_pose.py RENAMED Viewed

@@ -3,7 +3,7 @@
 import gradio as gr
-def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Human Pose')
@@ -13,10 +13,15 @@ def create_demo(process, max_images=12):
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
@@ -28,22 +33,21 @@ def create_demo(process, max_images=12):
                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -53,16 +57,33 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='pose')
     return demo

 import gradio as gr
+def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Human Pose')
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
+                    is_pose_image = gr.Checkbox(label='Is pose image',
+                                                value=False)
+                    gr.Markdown(
+                        'You can use [PoseMaker2](https://huggingface.co/spaces/jonigata/PoseMaker2) to create pose images.'
+                    )
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            detect_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
+            is_pose_image,
         ]
+        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='pose')
     return demo
+if __name__ == '__main__':
+    from model import Model
+    model = Model()
+    demo = create_demo(model.process_pose)
+    demo.queue().launch()

gradio_scribble2image.py → app_scribble.py RENAMED Viewed

@@ -3,7 +3,7 @@
 import gradio as gr
-def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Scribble Maps')
@@ -16,29 +16,28 @@ def create_demo(process, max_images=12):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
                                                  maximum=768,
                                                  value=512,
                                                  step=256)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -48,16 +47,31 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='scribble')
     return demo

 import gradio as gr
+def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Scribble Maps')
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
                                                  maximum=768,
                                                  value=512,
                                                  step=256)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
         ]
+        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='scribble')
     return demo
+if __name__ == '__main__':
+    from model import Model
+    model = Model()
+    demo = create_demo(model.process_scribble)
+    demo.queue().launch()

gradio_scribble2image_interactive.py → app_scribble_interactive.py RENAMED Viewed

@@ -8,7 +8,7 @@ def create_canvas(w, h):
     return np.zeros(shape=(h, w, 3), dtype=np.uint8) + 255
-def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown(
@@ -37,7 +37,7 @@ def create_demo(process, max_images=12):
                 )
                 create_button.click(fn=create_canvas,
                                     inputs=[canvas_width, canvas_height],
-                                    outputs=[input_image],
                                     queue=False)
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
@@ -45,29 +45,28 @@ def create_demo(process, max_images=12):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
                                                  maximum=768,
                                                  value=512,
                                                  step=256)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -77,13 +76,28 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, ddim_steps, scale, seed, eta
         ]
-        run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
     return demo

     return np.zeros(shape=(h, w, 3), dtype=np.uint8) + 255
+def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown(
                 )
                 create_button.click(fn=create_canvas,
                                     inputs=[canvas_width, canvas_height],
+                                    outputs=input_image,
                                     queue=False)
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
                                                  maximum=768,
                                                  value=512,
                                                  step=256)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
         ]
+        prompt.submit(fn=process, inputs=inputs, outputs=result)
+        run_button.click(fn=process, inputs=inputs, outputs=result)
     return demo
+if __name__ == '__main__':
+    from model import Model
+    model = Model()
+    demo = create_demo(model.process_scribble_interactive)
+    demo.queue().launch()

gradio_seg2image.py → app_seg.py RENAMED Viewed

@@ -3,7 +3,7 @@
 import gradio as gr
-def create_demo(process, max_images=12):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Segmentation Maps')
@@ -13,10 +13,12 @@ def create_demo(process, max_images=12):
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
-                                            value=1,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
@@ -29,22 +31,21 @@ def create_demo(process, max_images=12):
                         maximum=1024,
                         value=512,
                         step=1)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -54,16 +55,33 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='seg')
     return demo

 import gradio as gr
+def create_demo(process, max_images=12, default_num_images=3):
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Markdown('## Control Stable Diffusion with Segmentation Maps')
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                 with gr.Accordion('Advanced options', open=False):
+                    is_segmentation_map = gr.Checkbox(
+                        label='Is segmentation map', value=False)
                     num_samples = gr.Slider(label='Images',
                                             minimum=1,
                                             maximum=max_images,
+                                            value=default_num_images,
                                             step=1)
                     image_resolution = gr.Slider(label='Image Resolution',
                                                  minimum=256,
                         maximum=1024,
                         value=512,
                         step=1)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
                                      randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            detect_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
+            is_segmentation_map,
         ]
+        prompt.submit(fn=process, inputs=inputs, outputs=result)
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='seg')
     return demo
+if __name__ == '__main__':
+    from model import Model
+    model = Model()
+    demo = create_demo(model.process_seg)
+    demo.queue().launch()

model.py CHANGED Viewed

@@ -3,21 +3,20 @@
 from __future__ import annotations
 import pathlib
-import random
-import shlex
-import subprocess
 import sys
 import cv2
-import einops
 import numpy as np
 import torch
-from huggingface_hub import hf_hub_url
-from pytorch_lightning import seed_everything
-sys.path.append('ControlNet')
-import config
 from annotator.canny import apply_canny
 from annotator.hed import apply_hed, nms
 from annotator.midas import apply_midas
@@ -25,733 +24,600 @@ from annotator.mlsd import apply_mlsd
 from annotator.openpose import apply_openpose
 from annotator.uniformer import apply_uniformer
 from annotator.util import HWC3, resize_image
-from cldm.model import create_model, load_state_dict
-from ldm.models.diffusion.ddim import DDIMSampler
 from share import *
-MODEL_NAMES = {
-    'canny': 'control_canny-fp16.safetensors',
-    'hough': 'control_mlsd-fp16.safetensors',
-    'hed': 'control_hed-fp16.safetensors',
-    'scribble': 'control_scribble-fp16.safetensors',
-    'pose': 'control_openpose-fp16.safetensors',
-    'seg': 'control_seg-fp16.safetensors',
-    'depth': 'control_depth-fp16.safetensors',
-    'normal': 'control_normal-fp16.safetensors',
 }
-MODEL_REPO = 'webui/ControlNet-modules-safetensors'
-DEFAULT_BASE_MODEL_REPO = 'andite/anything-v4.0'
-DEFAULT_BASE_MODEL_FILENAME = 'anything-v4.0-pruned.safetensors'
-DEFAULT_BASE_MODEL_URL = 'https://huggingface.co/andite/anything-v4.0/resolve/main/anything-v4.0-pruned.safetensors'
 class Model:
     def __init__(self,
-                 model_config_path: str = 'ControlNet/models/cldm_v15.yaml',
-                 model_dir: str = 'models'):
-        self.device = torch.device(
-            'cuda:0' if torch.cuda.is_available() else 'cpu')
-        self.model = create_model(model_config_path).to(self.device)
-        self.ddim_sampler = DDIMSampler(self.model)
         self.task_name = ''
-        self.base_model_url = ''
-        self.model_dir = pathlib.Path(model_dir)
-        self.model_dir.mkdir(exist_ok=True, parents=True)
-        self.download_models()
-        self.set_base_model(DEFAULT_BASE_MODEL_REPO,
-                            DEFAULT_BASE_MODEL_FILENAME)
-    def set_base_model(self, model_id: str, filename: str) -> str:
-        if not model_id or not filename:
-            return self.base_model_url
-        base_model_url = hf_hub_url(model_id, filename)
-        if base_model_url != self.base_model_url:
-            self.load_base_model(base_model_url)
-            self.base_model_url = base_model_url
-        return self.base_model_url
-    def download_base_model(self, model_url: str) -> pathlib.Path:
-        self.model_dir.mkdir(exist_ok=True, parents=True)
-        model_name = model_url.split('/')[-1]
-        out_path = self.model_dir / model_name
-        if not out_path.exists():
-            subprocess.run(shlex.split(f'wget {model_url} -O {out_path}'))
-        return out_path
-    def load_base_model(self, model_url: str) -> None:
-        model_path = self.download_base_model(model_url)
-        self.model.load_state_dict(load_state_dict(model_path,
-                                                   location=self.device.type),
-                                   strict=False)
-    def load_weight(self, task_name: str) -> None:
         if task_name == self.task_name:
             return
-        weight_path = self.get_weight_path(task_name)
-        self.model.control_model.load_state_dict(
-            load_state_dict(weight_path, location=self.device.type))
         self.task_name = task_name
-    def get_weight_path(self, task_name: str) -> str:
-        if 'scribble' in task_name:
-            task_name = 'scribble'
-        return f'{self.model_dir}/{MODEL_NAMES[task_name]}'
-    def download_models(self) -> None:
-        self.model_dir.mkdir(exist_ok=True, parents=True)
-        for name in MODEL_NAMES.values():
-            out_path = self.model_dir / name
-            if out_path.exists():
-                continue
-            model_url = hf_hub_url(MODEL_REPO, name)
-            subprocess.run(shlex.split(f'wget {model_url} -O {out_path}'))
     @torch.inference_mode()
-    def process_canny(self, input_image, prompt, a_prompt, n_prompt,
-                      num_samples, image_resolution, ddim_steps, scale, seed,
-                      eta, low_threshold, high_threshold):
-        self.load_weight('canny')
-        img = resize_image(HWC3(input_image), image_resolution)
-        H, W, C = img.shape
-        detected_map = apply_canny(img, low_threshold, high_threshold)
-        detected_map = HWC3(detected_map)
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [255 - detected_map] + results
-    @torch.inference_mode()
-    def process_hough(self, input_image, prompt, a_prompt, n_prompt,
-                      num_samples, image_resolution, detect_resolution,
-                      ddim_steps, scale, seed, eta, value_threshold,
-                      distance_threshold):
-        self.load_weight('hough')
-        input_image = HWC3(input_image)
-        detected_map = apply_mlsd(resize_image(input_image, detect_resolution),
-                                  value_threshold, distance_threshold)
-        detected_map = HWC3(detected_map)
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H),
-                                  interpolation=cv2.INTER_NEAREST)
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [
-            255 - cv2.dilate(detected_map,
-                             np.ones(shape=(3, 3), dtype=np.uint8),
-                             iterations=1)
-        ] + results
     @torch.inference_mode()
-    def process_hed(self, input_image, prompt, a_prompt, n_prompt, num_samples,
-                    image_resolution, detect_resolution, ddim_steps, scale,
-                    seed, eta):
-        self.load_weight('hed')
         input_image = HWC3(input_image)
-        detected_map = apply_hed(resize_image(input_image, detect_resolution))
-        detected_map = HWC3(detected_map)
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H),
-                                  interpolation=cv2.INTER_LINEAR)
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [detected_map] + results
     @torch.inference_mode()
-    def process_scribble(self, input_image, prompt, a_prompt, n_prompt,
-                         num_samples, image_resolution, ddim_steps, scale,
-                         seed, eta):
-        self.load_weight('scribble')
-        img = resize_image(HWC3(input_image), image_resolution)
-        H, W, C = img.shape
-        detected_map = np.zeros_like(img, dtype=np.uint8)
-        detected_map[np.min(img, axis=2) < 127] = 255
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [255 - detected_map] + results
     @torch.inference_mode()
-    def process_scribble_interactive(self, input_image, prompt, a_prompt,
-                                     n_prompt, num_samples, image_resolution,
-                                     ddim_steps, scale, seed, eta):
-        self.load_weight('scribble')
-        img = resize_image(HWC3(input_image['mask'][:, :, 0]),
-                           image_resolution)
-        H, W, C = img.shape
-        detected_map = np.zeros_like(img, dtype=np.uint8)
-        detected_map[np.min(img, axis=2) > 127] = 255
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [255 - detected_map] + results
     @torch.inference_mode()
-    def process_fake_scribble(self, input_image, prompt, a_prompt, n_prompt,
-                              num_samples, image_resolution, detect_resolution,
-                              ddim_steps, scale, seed, eta):
-        self.load_weight('scribble')
         input_image = HWC3(input_image)
-        detected_map = apply_hed(resize_image(input_image, detect_resolution))
-        detected_map = HWC3(detected_map)
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H),
-                                  interpolation=cv2.INTER_LINEAR)
-        detected_map = nms(detected_map, 127, 3.0)
-        detected_map = cv2.GaussianBlur(detected_map, (0, 0), 3.0)
-        detected_map[detected_map > 4] = 255
-        detected_map[detected_map < 255] = 0
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [255 - detected_map] + results
-    @torch.inference_mode()
-    def process_pose(self, input_image, prompt, a_prompt, n_prompt,
-                     num_samples, image_resolution, detect_resolution,
-                     ddim_steps, scale, seed, eta):
-        self.load_weight('pose')
-        input_image = HWC3(input_image)
-        detected_map, _ = apply_openpose(
-            resize_image(input_image, detect_resolution))
-        detected_map = HWC3(detected_map)
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H),
-                                  interpolation=cv2.INTER_NEAREST)
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [detected_map] + results
-    @torch.inference_mode()
-    def process_seg(self, input_image, prompt, a_prompt, n_prompt, num_samples,
-                    image_resolution, detect_resolution, ddim_steps, scale,
-                    seed, eta):
-        self.load_weight('seg')
         input_image = HWC3(input_image)
-        detected_map = apply_uniformer(
-            resize_image(input_image, detect_resolution))
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H),
-                                  interpolation=cv2.INTER_NEAREST)
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [detected_map] + results
     @torch.inference_mode()
-    def process_depth(self, input_image, prompt, a_prompt, n_prompt,
-                      num_samples, image_resolution, detect_resolution,
-                      ddim_steps, scale, seed, eta):
-        self.load_weight('depth')
         input_image = HWC3(input_image)
-        detected_map, _ = apply_midas(
-            resize_image(input_image, detect_resolution))
-        detected_map = HWC3(detected_map)
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H),
-                                  interpolation=cv2.INTER_LINEAR)
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [detected_map] + results
     @torch.inference_mode()
-    def process_normal(self, input_image, prompt, a_prompt, n_prompt,
-                       num_samples, image_resolution, detect_resolution,
-                       ddim_steps, scale, seed, eta, bg_threshold):
-        self.load_weight('normal')
         input_image = HWC3(input_image)
-        _, detected_map = apply_midas(resize_image(input_image,
-                                                   detect_resolution),
-                                      bg_th=bg_threshold)
-        detected_map = HWC3(detected_map)
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H),
-                                  interpolation=cv2.INTER_LINEAR)
-        control = torch.from_numpy(
-            detected_map[:, :, ::-1].copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [detected_map] + results

 from __future__ import annotations
 import pathlib
 import sys
 import cv2
 import numpy as np
+import PIL.Image
 import torch
+from diffusers import (ControlNetModel, DiffusionPipeline,
+                       StableDiffusionControlNetPipeline,
+                       UniPCMultistepScheduler)
+repo_dir = pathlib.Path(__file__).parent
+submodule_dir = repo_dir / 'ControlNet'
+sys.path.append(submodule_dir.as_posix())
 from annotator.canny import apply_canny
 from annotator.hed import apply_hed, nms
 from annotator.midas import apply_midas
 from annotator.openpose import apply_openpose
 from annotator.uniformer import apply_uniformer
 from annotator.util import HWC3, resize_image
 from share import *
+CONTROLNET_MODEL_IDS = {
+    'canny': 'lllyasviel/sd-controlnet-canny',
+    'hough': 'lllyasviel/sd-controlnet-mlsd',
+    'hed': 'lllyasviel/sd-controlnet-hed',
+    'scribble': 'lllyasviel/sd-controlnet-scribble',
+    'pose': 'lllyasviel/sd-controlnet-openpose',
+    'seg': 'lllyasviel/sd-controlnet-seg',
+    'depth': 'lllyasviel/sd-controlnet-depth',
+    'normal': 'lllyasviel/sd-controlnet-normal',
 }
+def download_all_controlnet_weights() -> None:
+    for model_id in CONTROLNET_MODEL_IDS.values():
+        ControlNetModel.from_pretrained(model_id)
 class Model:
     def __init__(self,
+                 base_model_id: str = 'runwayml/stable-diffusion-v1-5',
+                 task_name: str = 'canny'):
+        self.base_model_id = ''
         self.task_name = ''
+        self.pipe = self.load_pipe(base_model_id, task_name)
+    def load_pipe(self, base_model_id: str, task_name) -> DiffusionPipeline:
+        if base_model_id == self.base_model_id and task_name == self.task_name:
+            return self.pipe
+        model_id = CONTROLNET_MODEL_IDS[task_name]
+        controlnet = ControlNetModel.from_pretrained(model_id,
+                                                     torch_dtype=torch.float16)
+        pipe = StableDiffusionControlNetPipeline.from_pretrained(
+            base_model_id,
+            safety_checker=None,
+            controlnet=controlnet,
+            torch_dtype=torch.float16)
+        pipe.scheduler = UniPCMultistepScheduler.from_config(
+            pipe.scheduler.config)
+        pipe.enable_xformers_memory_efficient_attention()
+        pipe.enable_model_cpu_offload()
+        self.base_model_id = base_model_id
+        self.task_name = task_name
+        return pipe
+    def set_base_model(self, base_model_id: str) -> str:
+        self.pipe = self.load_pipe(base_model_id, self.task_name)
+        return self.base_model_id
+    def load_controlnet_weight(self, task_name: str) -> None:
         if task_name == self.task_name:
             return
+        model_id = CONTROLNET_MODEL_IDS[task_name]
+        controlnet = ControlNetModel.from_pretrained(model_id,
+                                                     torch_dtype=torch.float16)
+        from accelerate import cpu_offload_with_hook
+        cpu_offload_with_hook(controlnet, torch.device('cuda:0'))
+        self.pipe.controlnet = controlnet
         self.task_name = task_name
+    def get_prompt(self, prompt: str, additional_prompt: str) -> str:
+        if not prompt:
+            prompt = additional_prompt
+        else:
+            prompt = f'{prompt}, {additional_prompt}'
+        return prompt
+    def run_pipe(
+        self,
+        prompt: str,
+        negative_prompt: str,
+        control_image: PIL.Image.Image,
+        num_images: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+    ) -> list[PIL.Image.Image]:
+        generator = torch.Generator().manual_seed(seed)
+        return self.pipe(prompt=prompt,
+                         negative_prompt=negative_prompt,
+                         guidance_scale=guidance_scale,
+                         num_images_per_prompt=num_images,
+                         num_inference_steps=num_steps,
+                         generator=generator,
+                         image=control_image).images
+    @staticmethod
+    def preprocess_canny(
+        input_image: np.ndarray,
+        image_resolution: int,
+        low_threshold: int,
+        high_threshold: int,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
+        image = resize_image(HWC3(input_image), image_resolution)
+        control_image = apply_canny(image, low_threshold, high_threshold)
+        control_image = HWC3(control_image)
+        vis_control_image = 255 - control_image
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            vis_control_image)
     @torch.inference_mode()
+    def process_canny(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_images: int,
+        image_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+        low_threshold: int,
+        high_threshold: int,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_canny(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            low_threshold=low_threshold,
+            high_threshold=high_threshold,
+        )
+        self.load_controlnet_weight('canny')
+        results = self.run_pipe(
+            prompt=self.get_prompt(prompt, additional_prompt),
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            num_images=num_images,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        return [vis_control_image] + results
+    @staticmethod
+    def preprocess_hough(
+        input_image: np.ndarray,
+        image_resolution: int,
+        detect_resolution: int,
+        value_threshold: float,
+        distance_threshold: float,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
+        input_image = HWC3(input_image)
+        control_image = apply_mlsd(
+            resize_image(input_image, detect_resolution), value_threshold,
+            distance_threshold)
+        control_image = HWC3(control_image)
+        image = resize_image(input_image, image_resolution)
+        H, W = image.shape[:2]
+        control_image = cv2.resize(control_image, (W, H),
+                                   interpolation=cv2.INTER_NEAREST)
+        vis_control_image = 255 - cv2.dilate(
+            control_image, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1)
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            vis_control_image)
     @torch.inference_mode()
+    def process_hough(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_images: int,
+        image_resolution: int,
+        detect_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+        value_threshold: float,
+        distance_threshold: float,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_hough(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            detect_resolution=detect_resolution,
+            value_threshold=value_threshold,
+            distance_threshold=distance_threshold,
+        )
+        self.load_controlnet_weight('hough')
+        results = self.run_pipe(
+            prompt=self.get_prompt(prompt, additional_prompt),
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            num_images=num_images,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        return [vis_control_image] + results
+    @staticmethod
+    def preprocess_hed(
+        input_image: np.ndarray,
+        image_resolution: int,
+        detect_resolution: int,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
+        control_image = apply_hed(resize_image(input_image, detect_resolution))
+        control_image = HWC3(control_image)
+        image = resize_image(input_image, image_resolution)
+        H, W = image.shape[:2]
+        control_image = cv2.resize(control_image, (W, H),
+                                   interpolation=cv2.INTER_LINEAR)
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            control_image)
     @torch.inference_mode()
+    def process_hed(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_images: int,
+        image_resolution: int,
+        detect_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_hed(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            detect_resolution=detect_resolution,
+        )
+        self.load_controlnet_weight('hed')
+        results = self.run_pipe(
+            prompt=self.get_prompt(prompt, additional_prompt),
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            num_images=num_images,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        return [vis_control_image] + results
+    @staticmethod
+    def preprocess_scribble(
+        input_image: np.ndarray,
+        image_resolution: int,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
+        image = resize_image(HWC3(input_image), image_resolution)
+        control_image = np.zeros_like(image, dtype=np.uint8)
+        control_image[np.min(image, axis=2) < 127] = 255
+        vis_control_image = 255 - control_image
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            vis_control_image)
     @torch.inference_mode()
+    def process_scribble(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_images: int,
+        image_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_scribble(
+            input_image=input_image,
+            image_resolution=image_resolution,
+        )
+        self.load_controlnet_weight('scribble')
+        results = self.run_pipe(
+            prompt=self.get_prompt(prompt, additional_prompt),
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            num_images=num_images,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        return [vis_control_image] + results
+    @staticmethod
+    def preprocess_scribble_interactive(
+        input_image: np.ndarray,
+        image_resolution: int,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
+        image = resize_image(HWC3(input_image['mask'][:, :, 0]),
+                             image_resolution)
+        control_image = np.zeros_like(image, dtype=np.uint8)
+        control_image[np.min(image, axis=2) > 127] = 255
+        vis_control_image = 255 - control_image
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            vis_control_image)
     @torch.inference_mode()
+    def process_scribble_interactive(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_images: int,
+        image_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_scribble_interactive(
+            input_image=input_image,
+            image_resolution=image_resolution,
+        )
+        self.load_controlnet_weight('scribble')
+        results = self.run_pipe(
+            prompt=self.get_prompt(prompt, additional_prompt),
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            num_images=num_images,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        return [vis_control_image] + results
+    @staticmethod
+    def preprocess_fake_scribble(
+        input_image: np.ndarray,
+        image_resolution: int,
+        detect_resolution: int,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
+        control_image = apply_hed(resize_image(input_image, detect_resolution))
+        control_image = HWC3(control_image)
+        image = resize_image(input_image, image_resolution)
+        H, W = image.shape[:2]
+        control_image = cv2.resize(control_image, (W, H),
+                                   interpolation=cv2.INTER_LINEAR)
+        control_image = nms(control_image, 127, 3.0)
+        control_image = cv2.GaussianBlur(control_image, (0, 0), 3.0)
+        control_image[control_image > 4] = 255
+        control_image[control_image < 255] = 0
+        vis_control_image = 255 - control_image
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            vis_control_image)
+    @torch.inference_mode()
+    def process_fake_scribble(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_images: int,
+        image_resolution: int,
+        detect_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_fake_scribble(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            detect_resolution=detect_resolution,
+        )
+        self.load_controlnet_weight('scribble')
+        results = self.run_pipe(
+            prompt=self.get_prompt(prompt, additional_prompt),
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            num_images=num_images,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        return [vis_control_image] + results
+    @staticmethod
+    def preprocess_pose(
+        input_image: np.ndarray,
+        image_resolution: int,
+        detect_resolution: int,
+        is_pose_image: bool,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
+        if not is_pose_image:
+            control_image, _ = apply_openpose(
+                resize_image(input_image, detect_resolution))
+            control_image = HWC3(control_image)
+            image = resize_image(input_image, image_resolution)
+            H, W = image.shape[:2]
+            control_image = cv2.resize(control_image, (W, H),
+                                       interpolation=cv2.INTER_NEAREST)
+        else:
+            control_image = input_image
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            control_image)
     @torch.inference_mode()
+    def process_pose(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_images: int,
+        image_resolution: int,
+        detect_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+        is_pose_image: bool,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_pose(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            detect_resolution=detect_resolution,
+            is_pose_image=is_pose_image,
+        )
+        self.load_controlnet_weight('pose')
+        results = self.run_pipe(
+            prompt=self.get_prompt(prompt, additional_prompt),
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            num_images=num_images,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        return [vis_control_image] + results
+    @staticmethod
+    def preprocess_seg(
+        input_image: np.ndarray,
+        image_resolution: int,
+        detect_resolution: int,
+        is_segmentation_map: bool,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
+        if not is_segmentation_map:
+            control_image = apply_uniformer(
+                resize_image(input_image, detect_resolution))
+            image = resize_image(input_image, image_resolution)
+            H, W = image.shape[:2]
+            control_image = cv2.resize(control_image, (W, H),
+                                       interpolation=cv2.INTER_NEAREST)
+        else:
+            control_image = input_image
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            control_image)
     @torch.inference_mode()
+    def process_seg(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_images: int,
+        image_resolution: int,
+        detect_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+        is_segmentation_map: bool,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_seg(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            detect_resolution=detect_resolution,
+            is_segmentation_map=is_segmentation_map,
+        )
+        self.load_controlnet_weight('seg')
+        results = self.run_pipe(
+            prompt=self.get_prompt(prompt, additional_prompt),
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            num_images=num_images,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        return [vis_control_image] + results
+    @staticmethod
+    def preprocess_depth(
+        input_image: np.ndarray,
+        image_resolution: int,
+        detect_resolution: int,
+        is_depth_image: bool,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
+        input_image = HWC3(input_image)
+        if not is_depth_image:
+            control_image, _ = apply_midas(
+                resize_image(input_image, detect_resolution))
+            control_image = HWC3(control_image)
+            image = resize_image(input_image, image_resolution)
+            H, W = image.shape[:2]
+            control_image = cv2.resize(control_image, (W, H),
+                                       interpolation=cv2.INTER_LINEAR)
+        else:
+            control_image = input_image
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            control_image)
+    @torch.inference_mode()
+    def process_depth(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_images: int,
+        image_resolution: int,
+        detect_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+        is_depth_image: bool,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_depth(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            detect_resolution=detect_resolution,
+            is_depth_image=is_depth_image,
+        )
+        self.load_controlnet_weight('depth')
+        results = self.run_pipe(
+            prompt=self.get_prompt(prompt, additional_prompt),
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            num_images=num_images,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        return [vis_control_image] + results
+    @staticmethod
+    def preprocess_normal(
+        input_image: np.ndarray,
+        image_resolution: int,
+        detect_resolution: int,
+        bg_threshold: float,
+        is_normal_image: bool,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
+        if not is_normal_image:
+            _, control_image = apply_midas(resize_image(
+                input_image, detect_resolution),
+                                           bg_th=bg_threshold)
+            control_image = HWC3(control_image)
+            image = resize_image(input_image, image_resolution)
+            H, W = image.shape[:2]
+            control_image = cv2.resize(control_image, (W, H),
+                                       interpolation=cv2.INTER_LINEAR)
+        else:
+            control_image = input_image
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            control_image)
+    @torch.inference_mode()
+    def process_normal(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_images: int,
+        image_resolution: int,
+        detect_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+        bg_threshold: float,
+        is_normal_image: bool,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_normal(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            detect_resolution=detect_resolution,
+            bg_threshold=bg_threshold,
+            is_normal_image=is_normal_image,
+        )
+        self.load_controlnet_weight('normal')
+        results = self.run_pipe(
+            prompt=self.get_prompt(prompt, additional_prompt),
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            num_images=num_images,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        return [vis_control_image] + results

requirements.txt CHANGED Viewed

@@ -1,8 +1,9 @@
 addict==2.4.0
 albumentations==1.3.0
 einops==0.6.0
-gradio==3.18.0
-huggingface-hub==0.12.0
 imageio==2.25.0
 imageio-ffmpeg==0.4.8
 kornia==0.6.9

 addict==2.4.0
 albumentations==1.3.0
 einops==0.6.0
+git+https://github.com/huggingface/accelerate@78151f8
+git+https://github.com/huggingface/diffusers@fa6d52d
+gradio==3.20.0
 imageio==2.25.0
 imageio-ffmpeg==0.4.8
 kornia==0.6.9