Spaces:

hysts
/

ControlNet

Running

App Files Files Community

hysts HF Staff commited on Mar 4, 2023

Commit

0ae9725

1 Parent(s): 83f6448

Update to use diffusers

Browse files

Files changed (13) hide show

README.md +1 -1
gradio_canny2image.py +41 -33
gradio_depth2image.py +28 -22
gradio_fake_scribble2image.py +28 -22
gradio_hed2image.py +28 -22
gradio_hough2image.py +32 -25
gradio_normal2image.py +29 -23
gradio_pose2image.py +28 -22
gradio_scribble2image.py +27 -22
gradio_scribble2image_interactive.py +27 -22
gradio_seg2image.py +28 -22
model.py +559 -708
requirements.txt +3 -1

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🌖
 colorFrom: pink
 colorTo: blue
 sdk: gradio
-sdk_version: 3.18.0
 python_version: 3.10.9
 app_file: app.py
 pinned: false

 colorFrom: pink
 colorTo: blue
 sdk: gradio
+sdk_version: 3.20.0
 python_version: 3.10.9
 app_file: app.py
 pinned: false

gradio_canny2image.py CHANGED Viewed

@@ -23,33 +23,33 @@ def create_demo(process, max_images=12):
                                                  maximum=768,
                                                  value=512,
                                                  step=256)
-                    low_threshold = gr.Slider(label='Canny low threshold',
-                                              minimum=1,
-                                              maximum=255,
-                                              value=100,
-                                              step=1)
-                    high_threshold = gr.Slider(label='Canny high threshold',
-                                               minimum=1,
-                                               maximum=255,
-                                               value=200,
-                                               step=1)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
-                                     randomize=True,
-                                     queue=False)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -59,17 +59,25 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, ddim_steps, scale, seed, eta, low_threshold,
-            high_threshold
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='canny')
     return demo

                                                  maximum=768,
                                                  value=512,
                                                  step=256)
+                    canny_low_threshold = gr.Slider(
+                        label='Canny low threshold',
+                        minimum=1,
+                        maximum=255,
+                        value=100,
+                        step=1)
+                    canny_high_threshold = gr.Slider(
+                        label='Canny high threshold',
+                        minimum=1,
+                        maximum=255,
+                        value=200,
+                        step=1)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
+                                     randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
+            canny_low_threshold,
+            canny_high_threshold,
         ]
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='canny')
     return demo

gradio_depth2image.py CHANGED Viewed

@@ -28,23 +28,21 @@ def create_demo(process, max_images=12):
                                                   maximum=1024,
                                                   value=384,
                                                   step=1)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
-                                     randomize=True,
-                                     queue=False)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -54,16 +52,24 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='depth')
     return demo

                                                   maximum=1024,
                                                   value=384,
                                                   step=1)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
+                                     randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            detect_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
         ]
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='depth')
     return demo

gradio_fake_scribble2image.py CHANGED Viewed

@@ -28,23 +28,21 @@ def create_demo(process, max_images=12):
                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
-                                     randomize=True,
-                                     queue=False)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -54,16 +52,24 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='fake_scribble')
     return demo

                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
+                                     randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            detect_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
         ]
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='fake_scribble')
     return demo

gradio_hed2image.py CHANGED Viewed

@@ -28,23 +28,21 @@ def create_demo(process, max_images=12):
                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
-                                     randomize=True,
-                                     queue=False)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -54,16 +52,24 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='hed')
     return demo

                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
+                                     randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            detect_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
         ]
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='hed')
     return demo

gradio_hough2image.py CHANGED Viewed

@@ -28,35 +28,33 @@ def create_demo(process, max_images=12):
                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
-                    value_threshold = gr.Slider(
                         label='Hough value threshold (MLSD)',
                         minimum=0.01,
                         maximum=2.0,
                         value=0.1,
                         step=0.01)
-                    distance_threshold = gr.Slider(
                         label='Hough distance threshold (MLSD)',
                         minimum=0.01,
                         maximum=20.0,
                         value=0.1,
                         step=0.01)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
-                                     randomize=True,
-                                     queue=False)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -66,17 +64,26 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, detect_resolution, ddim_steps, scale, seed, eta,
-            value_threshold, distance_threshold
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='hough')
     return demo

                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
+                    mlsd_value_threshold = gr.Slider(
                         label='Hough value threshold (MLSD)',
                         minimum=0.01,
                         maximum=2.0,
                         value=0.1,
                         step=0.01)
+                    mlsd_distance_threshold = gr.Slider(
                         label='Hough distance threshold (MLSD)',
                         minimum=0.01,
                         maximum=20.0,
                         value=0.1,
                         step=0.01)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
+                                     randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            detect_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
+            mlsd_value_threshold,
+            mlsd_distance_threshold,
         ]
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='hough')
     return demo

gradio_normal2image.py CHANGED Viewed

@@ -34,23 +34,21 @@ def create_demo(process, max_images=12):
                         maximum=1.0,
                         value=0.4,
                         step=0.01)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
-                                     randomize=True,
-                                     queue=False)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -60,17 +58,25 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, detect_resolution, ddim_steps, scale, seed, eta,
-            bg_threshold
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='normal')
     return demo

                         maximum=1.0,
                         value=0.4,
                         step=0.01)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
+                                     randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            detect_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
+            bg_threshold,
         ]
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='normal')
     return demo

gradio_pose2image.py CHANGED Viewed

@@ -28,23 +28,21 @@ def create_demo(process, max_images=12):
                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
-                                     randomize=True,
-                                     queue=False)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -54,16 +52,24 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='pose')
     return demo

                                                   maximum=1024,
                                                   value=512,
                                                   step=1)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
+                                     randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            detect_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
         ]
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='pose')
     return demo

gradio_scribble2image.py CHANGED Viewed

@@ -23,23 +23,21 @@ def create_demo(process, max_images=12):
                                                  maximum=768,
                                                  value=512,
                                                  step=256)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
-                                     randomize=True,
-                                     queue=False)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -49,16 +47,23 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='scribble')
     return demo

                                                  maximum=768,
                                                  value=512,
                                                  step=256)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
+                                     randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
         ]
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='scribble')
     return demo

gradio_scribble2image_interactive.py CHANGED Viewed

@@ -37,7 +37,7 @@ def create_demo(process, max_images=12):
                 )
                 create_button.click(fn=create_canvas,
                                     inputs=[canvas_width, canvas_height],
-                                    outputs=[input_image],
                                     queue=False)
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
@@ -52,23 +52,21 @@ def create_demo(process, max_images=12):
                                                  maximum=768,
                                                  value=512,
                                                  step=256)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
-                                     randomize=True,
-                                     queue=False)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -78,13 +76,20 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, ddim_steps, scale, seed, eta
         ]
-        run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
     return demo

                 )
                 create_button.click(fn=create_canvas,
                                     inputs=[canvas_width, canvas_height],
+                                    outputs=input_image,
                                     queue=False)
                 prompt = gr.Textbox(label='Prompt')
                 run_button = gr.Button(label='Run')
                                                  maximum=768,
                                                  value=512,
                                                  step=256)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
+                                     randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
         ]
+        run_button.click(fn=process, inputs=inputs, outputs=result)
     return demo

gradio_seg2image.py CHANGED Viewed

@@ -29,23 +29,21 @@ def create_demo(process, max_images=12):
                         maximum=1024,
                         value=512,
                         step=1)
-                    ddim_steps = gr.Slider(label='Steps',
-                                           minimum=1,
-                                           maximum=100,
-                                           value=20,
-                                           step=1)
-                    scale = gr.Slider(label='Guidance Scale',
-                                      minimum=0.1,
-                                      maximum=30.0,
-                                      value=9.0,
-                                      step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
-                                     randomize=True,
-                                     queue=False)
-                    eta = gr.Number(label='eta (DDIM)', value=0.0)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
@@ -55,16 +53,24 @@ def create_demo(process, max_images=12):
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
-                result_gallery = gr.Gallery(label='Output',
-                                            show_label=False,
-                                            elem_id='gallery').style(
-                                                grid=2, height='auto')
-        ips = [
-            input_image, prompt, a_prompt, n_prompt, num_samples,
-            image_resolution, detect_resolution, ddim_steps, scale, seed, eta
         ]
         run_button.click(fn=process,
-                         inputs=ips,
-                         outputs=[result_gallery],
                          api_name='seg')
     return demo

                         maximum=1024,
                         value=512,
                         step=1)
+                    num_steps = gr.Slider(label='Steps',
+                                          minimum=1,
+                                          maximum=100,
+                                          value=20,
+                                          step=1)
+                    guidance_scale = gr.Slider(label='Guidance Scale',
+                                               minimum=0.1,
+                                               maximum=30.0,
+                                               value=9.0,
+                                               step=0.1)
                     seed = gr.Slider(label='Seed',
                                      minimum=-1,
                                      maximum=2147483647,
                                      step=1,
+                                     randomize=True)
                     a_prompt = gr.Textbox(
                         label='Added Prompt',
                         value='best quality, extremely detailed')
                         'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
                     )
             with gr.Column():
+                result = gr.Gallery(label='Output',
+                                    show_label=False,
+                                    elem_id='gallery').style(grid=2,
+                                                             height='auto')
+        inputs = [
+            input_image,
+            prompt,
+            a_prompt,
+            n_prompt,
+            num_samples,
+            image_resolution,
+            detect_resolution,
+            num_steps,
+            guidance_scale,
+            seed,
         ]
         run_button.click(fn=process,
+                         inputs=inputs,
+                         outputs=result,
                          api_name='seg')
     return demo

model.py CHANGED Viewed

@@ -3,20 +3,20 @@
 from __future__ import annotations
 import pathlib
-import random
-import shlex
-import subprocess
 import sys
 import cv2
-import einops
 import numpy as np
 import torch
-from pytorch_lightning import seed_everything
-sys.path.append('ControlNet')
-import config
 from annotator.canny import apply_canny
 from annotator.hed import apply_hed, nms
 from annotator.midas import apply_midas
@@ -24,743 +24,594 @@ from annotator.mlsd import apply_mlsd
 from annotator.openpose import apply_openpose
 from annotator.uniformer import apply_uniformer
 from annotator.util import HWC3, resize_image
-from cldm.model import create_model, load_state_dict
-from ldm.models.diffusion.ddim import DDIMSampler
 from share import *
-ORIGINAL_MODEL_NAMES = {
-    'canny': 'control_sd15_canny.pth',
-    'hough': 'control_sd15_mlsd.pth',
-    'hed': 'control_sd15_hed.pth',
-    'scribble': 'control_sd15_scribble.pth',
-    'pose': 'control_sd15_openpose.pth',
-    'seg': 'control_sd15_seg.pth',
-    'depth': 'control_sd15_depth.pth',
-    'normal': 'control_sd15_normal.pth',
 }
-ORIGINAL_WEIGHT_ROOT = 'https://huggingface.co/lllyasviel/ControlNet/resolve/main/models/'
-LIGHTWEIGHT_MODEL_NAMES = {
-    'canny': 'control_canny-fp16.safetensors',
-    'hough': 'control_mlsd-fp16.safetensors',
-    'hed': 'control_hed-fp16.safetensors',
-    'scribble': 'control_scribble-fp16.safetensors',
-    'pose': 'control_openpose-fp16.safetensors',
-    'seg': 'control_seg-fp16.safetensors',
-    'depth': 'control_depth-fp16.safetensors',
-    'normal': 'control_normal-fp16.safetensors',
-}
-LIGHTWEIGHT_WEIGHT_ROOT = 'https://huggingface.co/webui/ControlNet-modules-safetensors/resolve/main/'
 class Model:
-    def __init__(self,
-                 model_config_path: str = 'ControlNet/models/cldm_v15.yaml',
-                 model_dir: str = 'models',
-                 use_lightweight: bool = True):
-        self.device = torch.device(
-            'cuda:0' if torch.cuda.is_available() else 'cpu')
-        self.model = create_model(model_config_path).to(self.device)
-        self.ddim_sampler = DDIMSampler(self.model)
-        self.task_name = ''
-        self.model_dir = pathlib.Path(model_dir)
-        self.model_dir.mkdir(exist_ok=True, parents=True)
-        self.use_lightweight = use_lightweight
-        if use_lightweight:
-            self.model_names = LIGHTWEIGHT_MODEL_NAMES
-            self.weight_root = LIGHTWEIGHT_WEIGHT_ROOT
-            base_model_url = 'https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors'
-            self.load_base_model(base_model_url)
-        else:
-            self.model_names = ORIGINAL_MODEL_NAMES
-            self.weight_root = ORIGINAL_WEIGHT_ROOT
-        self.download_models()
-    def download_base_model(self, model_url: str) -> pathlib.Path:
-        model_name = model_url.split('/')[-1]
-        out_path = self.model_dir / model_name
-        if not out_path.exists():
-            subprocess.run(shlex.split(f'wget {model_url} -O {out_path}'))
-        return out_path
-    def load_base_model(self, model_url: str) -> None:
-        model_path = self.download_base_model(model_url)
-        self.model.load_state_dict(load_state_dict(model_path,
-                                                   location=self.device.type),
-                                   strict=False)
-    def load_weight(self, task_name: str) -> None:
         if task_name == self.task_name:
             return
-        weight_path = self.get_weight_path(task_name)
-        if not self.use_lightweight:
-            self.model.load_state_dict(
-                load_state_dict(weight_path, location=self.device))
-        else:
-            self.model.control_model.load_state_dict(
-                load_state_dict(weight_path, location=self.device.type))
         self.task_name = task_name
-    def get_weight_path(self, task_name: str) -> str:
-        if 'scribble' in task_name:
-            task_name = 'scribble'
-        return f'{self.model_dir}/{self.model_names[task_name]}'
-    def download_models(self) -> None:
-        self.model_dir.mkdir(exist_ok=True, parents=True)
-        for name in self.model_names.values():
-            out_path = self.model_dir / name
-            if out_path.exists():
-                continue
-            subprocess.run(
-                shlex.split(f'wget {self.weight_root}{name} -O {out_path}'))
     @torch.inference_mode()
-    def process_canny(self, input_image, prompt, a_prompt, n_prompt,
-                      num_samples, image_resolution, ddim_steps, scale, seed,
-                      eta, low_threshold, high_threshold):
-        self.load_weight('canny')
-        img = resize_image(HWC3(input_image), image_resolution)
-        H, W, C = img.shape
-        detected_map = apply_canny(img, low_threshold, high_threshold)
-        detected_map = HWC3(detected_map)
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [255 - detected_map] + results
-    @torch.inference_mode()
-    def process_hough(self, input_image, prompt, a_prompt, n_prompt,
-                      num_samples, image_resolution, detect_resolution,
-                      ddim_steps, scale, seed, eta, value_threshold,
-                      distance_threshold):
-        self.load_weight('hough')
-        input_image = HWC3(input_image)
-        detected_map = apply_mlsd(resize_image(input_image, detect_resolution),
-                                  value_threshold, distance_threshold)
-        detected_map = HWC3(detected_map)
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H),
-                                  interpolation=cv2.INTER_NEAREST)
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [
-            255 - cv2.dilate(detected_map,
-                             np.ones(shape=(3, 3), dtype=np.uint8),
-                             iterations=1)
-        ] + results
     @torch.inference_mode()
-    def process_hed(self, input_image, prompt, a_prompt, n_prompt, num_samples,
-                    image_resolution, detect_resolution, ddim_steps, scale,
-                    seed, eta):
-        self.load_weight('hed')
         input_image = HWC3(input_image)
-        detected_map = apply_hed(resize_image(input_image, detect_resolution))
-        detected_map = HWC3(detected_map)
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H),
-                                  interpolation=cv2.INTER_LINEAR)
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [detected_map] + results
     @torch.inference_mode()
-    def process_scribble(self, input_image, prompt, a_prompt, n_prompt,
-                         num_samples, image_resolution, ddim_steps, scale,
-                         seed, eta):
-        self.load_weight('scribble')
-        img = resize_image(HWC3(input_image), image_resolution)
-        H, W, C = img.shape
-        detected_map = np.zeros_like(img, dtype=np.uint8)
-        detected_map[np.min(img, axis=2) < 127] = 255
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [255 - detected_map] + results
     @torch.inference_mode()
-    def process_scribble_interactive(self, input_image, prompt, a_prompt,
-                                     n_prompt, num_samples, image_resolution,
-                                     ddim_steps, scale, seed, eta):
-        self.load_weight('scribble')
-        img = resize_image(HWC3(input_image['mask'][:, :, 0]),
-                           image_resolution)
-        H, W, C = img.shape
-        detected_map = np.zeros_like(img, dtype=np.uint8)
-        detected_map[np.min(img, axis=2) > 127] = 255
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [255 - detected_map] + results
     @torch.inference_mode()
-    def process_fake_scribble(self, input_image, prompt, a_prompt, n_prompt,
-                              num_samples, image_resolution, detect_resolution,
-                              ddim_steps, scale, seed, eta):
-        self.load_weight('scribble')
         input_image = HWC3(input_image)
-        detected_map = apply_hed(resize_image(input_image, detect_resolution))
-        detected_map = HWC3(detected_map)
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H),
-                                  interpolation=cv2.INTER_LINEAR)
-        detected_map = nms(detected_map, 127, 3.0)
-        detected_map = cv2.GaussianBlur(detected_map, (0, 0), 3.0)
-        detected_map[detected_map > 4] = 255
-        detected_map[detected_map < 255] = 0
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [255 - detected_map] + results
-    @torch.inference_mode()
-    def process_pose(self, input_image, prompt, a_prompt, n_prompt,
-                     num_samples, image_resolution, detect_resolution,
-                     ddim_steps, scale, seed, eta):
-        self.load_weight('pose')
         input_image = HWC3(input_image)
-        detected_map, _ = apply_openpose(
             resize_image(input_image, detect_resolution))
-        detected_map = HWC3(detected_map)
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H),
-                                  interpolation=cv2.INTER_NEAREST)
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [detected_map] + results
-    @torch.inference_mode()
-    def process_seg(self, input_image, prompt, a_prompt, n_prompt, num_samples,
-                    image_resolution, detect_resolution, ddim_steps, scale,
-                    seed, eta):
-        self.load_weight('seg')
         input_image = HWC3(input_image)
-        detected_map = apply_uniformer(
             resize_image(input_image, detect_resolution))
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H),
-                                  interpolation=cv2.INTER_NEAREST)
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [detected_map] + results
     @torch.inference_mode()
-    def process_depth(self, input_image, prompt, a_prompt, n_prompt,
-                      num_samples, image_resolution, detect_resolution,
-                      ddim_steps, scale, seed, eta):
-        self.load_weight('depth')
         input_image = HWC3(input_image)
-        detected_map, _ = apply_midas(
             resize_image(input_image, detect_resolution))
-        detected_map = HWC3(detected_map)
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H),
-                                  interpolation=cv2.INTER_LINEAR)
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [detected_map] + results
     @torch.inference_mode()
-    def process_normal(self, input_image, prompt, a_prompt, n_prompt,
-                       num_samples, image_resolution, detect_resolution,
-                       ddim_steps, scale, seed, eta, bg_threshold):
-        self.load_weight('normal')
         input_image = HWC3(input_image)
-        _, detected_map = apply_midas(resize_image(input_image,
-                                                   detect_resolution),
-                                      bg_th=bg_threshold)
-        detected_map = HWC3(detected_map)
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H),
-                                  interpolation=cv2.INTER_LINEAR)
-        control = torch.from_numpy(
-            detected_map[:, :, ::-1].copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        cond = {
-            'c_concat': [control],
-            'c_crossattn': [
-                self.model.get_learned_conditioning(
-                    [prompt + ', ' + a_prompt] * num_samples)
-            ]
-        }
-        un_cond = {
-            'c_concat': [control],
-            'c_crossattn':
-            [self.model.get_learned_conditioning([n_prompt] * num_samples)]
-        }
-        shape = (4, H // 8, W // 8)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=True)
-        samples, intermediates = self.ddim_sampler.sample(
-            ddim_steps,
-            num_samples,
-            shape,
-            cond,
-            verbose=False,
-            eta=eta,
-            unconditional_guidance_scale=scale,
-            unconditional_conditioning=un_cond)
-        if config.save_memory:
-            self.model.low_vram_shift(is_diffusing=False)
-        x_samples = self.model.decode_first_stage(samples)
-        x_samples = (
-            einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
-            127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-        return [detected_map] + results

 from __future__ import annotations
 import pathlib
 import sys
 import cv2
 import numpy as np
+import PIL.Image
 import torch
+from diffusers import (ControlNetModel, DiffusionPipeline,
+                       StableDiffusionControlNetPipeline,
+                       UniPCMultistepScheduler)
+repo_dir = pathlib.Path(__file__).parent
+submodule_dir = repo_dir / 'ControlNet'
+sys.path.append(submodule_dir.as_posix())
 from annotator.canny import apply_canny
 from annotator.hed import apply_hed, nms
 from annotator.midas import apply_midas
 from annotator.openpose import apply_openpose
 from annotator.uniformer import apply_uniformer
 from annotator.util import HWC3, resize_image
 from share import *
+CONTROLNET_MODEL_IDS = {
+    'canny': 'lllyasviel/sd-controlnet-canny',
+    'hough': 'lllyasviel/sd-controlnet-mlsd',
+    'hed': 'lllyasviel/sd-controlnet-hed',
+    'scribble': 'lllyasviel/sd-controlnet-scribble',
+    'pose': 'lllyasviel/sd-controlnet-openpose',
+    'seg': 'lllyasviel/sd-controlnet-seg',
+    'depth': 'lllyasviel/sd-controlnet-depth',
+    'normal': 'lllyasviel/sd-controlnet-normal',
 }
 class Model:
+    def __init__(self):
+        # FIXME
+        self.base_model_id = 'andite/anything-v4.0'
+        self.task_name = 'pose'
+        self.pipe = self.load_pipe()
+    def load_pipe(self) -> DiffusionPipeline:
+        model_id = CONTROLNET_MODEL_IDS[self.task_name]
+        controlnet = ControlNetModel.from_pretrained(model_id,
+                                                     torch_dtype=torch.float16)
+        pipe = StableDiffusionControlNetPipeline.from_pretrained(
+            self.base_model_id,
+            safety_checker=None,
+            controlnet=controlnet,
+            torch_dtype=torch.float16)
+        pipe.scheduler = UniPCMultistepScheduler.from_config(
+            pipe.scheduler.config)
+        pipe.enable_xformers_memory_efficient_attention()
+        pipe.enable_model_cpu_offload()
+        return pipe
+    def load_controlnet_weight(self, task_name: str) -> None:
         if task_name == self.task_name:
             return
+        model_id = CONTROLNET_MODEL_IDS[task_name]
+        controlnet = ControlNetModel.from_pretrained(model_id,
+                                                     torch_dtype=torch.float16)
+        from accelerate import cpu_offload_with_hook
+        cpu_offload_with_hook(controlnet, torch.device('cuda:0'))
+        self.pipe.controlnet = controlnet
         self.task_name = task_name
+    def get_prompt(self, prompt: str, additional_prompt: str) -> str:
+        if not prompt:
+            prompt = additional_prompt
+        else:
+            prompt = f'{prompt}, {additional_prompt}'
+        return prompt
+    def run_pipe(
+        self,
+        prompt: str,
+        negative_prompt: str,
+        control_image: PIL.Image.Image,
+        num_images: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+    ):
+        generator = torch.Generator().manual_seed(seed)
+        return self.pipe(prompt=prompt,
+                         negative_prompt=negative_prompt,
+                         guidance_scale=guidance_scale,
+                         num_images_per_prompt=num_images,
+                         num_inference_steps=num_steps,
+                         generator=generator,
+                         image=control_image)
+    def process(
+        self,
+        task_name: str,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        control_image: PIL.Image.Image,
+        vis_control_image: PIL.Image.Image,
+        num_samples: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+    ):
+        self.load_controlnet_weight(task_name)
+        results = self.run_pipe(
+            prompt=self.get_prompt(prompt, additional_prompt),
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            num_images=num_samples,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        return [vis_control_image] + results.images
+    def preprocess_canny(
+        self,
+        input_image: np.ndarray,
+        image_resolution: int,
+        low_threshold: int,
+        high_threshold: int,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
+        image = resize_image(HWC3(input_image), image_resolution)
+        control_image = apply_canny(image, low_threshold, high_threshold)
+        control_image = HWC3(control_image)
+        vis_control_image = 255 - control_image
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            vis_control_image)
     @torch.inference_mode()
+    def process_canny(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_samples: int,
+        image_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+        low_threshold: int,
+        high_threshold: int,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_canny(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            low_threshold=low_threshold,
+            high_threshold=high_threshold,
+        )
+        return self.process(
+            task_name='canny',
+            prompt=prompt,
+            additional_prompt=additional_prompt,
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            vis_control_image=vis_control_image,
+            num_samples=num_samples,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+    def preprocess_hough(
+        self,
+        input_image: np.ndarray,
+        image_resolution: int,
+        detect_resolution: int,
+        value_threshold: float,
+        distance_threshold: float,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
+        input_image = HWC3(input_image)
+        control_image = apply_mlsd(
+            resize_image(input_image, detect_resolution), value_threshold,
+            distance_threshold)
+        control_image = HWC3(control_image)
+        image = resize_image(input_image, image_resolution)
+        H, W = image.shape[:2]
+        control_image = cv2.resize(control_image, (W, H),
+                                   interpolation=cv2.INTER_NEAREST)
+        vis_control_image = 255 - cv2.dilate(
+            control_image, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1)
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            vis_control_image)
     @torch.inference_mode()
+    def process_hough(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_samples: int,
+        image_resolution: int,
+        detect_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+        value_threshold: float,
+        distance_threshold: float,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_hough(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            detect_resolution=detect_resolution,
+            value_threshold=value_threshold,
+            distance_threshold=distance_threshold,
+        )
+        return self.process(
+            task_name='hough',
+            prompt=prompt,
+            additional_prompt=additional_prompt,
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            vis_control_image=vis_control_image,
+            num_samples=num_samples,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+    def preprocess_hed(
+        self,
+        input_image: np.ndarray,
+        image_resolution: int,
+        detect_resolution: int,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
+        control_image = apply_hed(resize_image(input_image, detect_resolution))
+        control_image = HWC3(control_image)
+        image = resize_image(input_image, image_resolution)
+        H, W = image.shape[:2]
+        control_image = cv2.resize(control_image, (W, H),
+                                   interpolation=cv2.INTER_LINEAR)
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            control_image)
     @torch.inference_mode()
+    def process_hed(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_samples: int,
+        image_resolution: int,
+        detect_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_hed(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            detect_resolution=detect_resolution,
+        )
+        return self.process(
+            task_name='hed',
+            prompt=prompt,
+            additional_prompt=additional_prompt,
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            vis_control_image=vis_control_image,
+            num_samples=num_samples,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+    def preprocess_scribble(
+        self,
+        input_image: np.ndarray,
+        image_resolution: int,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
+        image = resize_image(HWC3(input_image), image_resolution)
+        control_image = np.zeros_like(image, dtype=np.uint8)
+        control_image[np.min(image, axis=2) < 127] = 255
+        vis_control_image = 255 - control_image
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            vis_control_image)
     @torch.inference_mode()
+    def process_scribble(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_samples: int,
+        image_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_scribble(
+            input_image=input_image,
+            image_resolution=image_resolution,
+        )
+        return self.process(
+            task_name='scribble',
+            prompt=prompt,
+            additional_prompt=additional_prompt,
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            vis_control_image=vis_control_image,
+            num_samples=num_samples,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+    def preprocess_scribble_interactive(
+        self,
+        input_image: np.ndarray,
+        image_resolution: int,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
+        image = resize_image(HWC3(input_image['mask'][:, :, 0]),
+                             image_resolution)
+        control_image = np.zeros_like(image, dtype=np.uint8)
+        control_image[np.min(image, axis=2) > 127] = 255
+        vis_control_image = 255 - control_image
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            vis_control_image)
     @torch.inference_mode()
+    def process_scribble_interactive(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_samples: int,
+        image_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_scribble_interactive(
+            input_image=input_image,
+            image_resolution=image_resolution,
+        )
+        return self.process(
+            task_name='scribble',
+            prompt=prompt,
+            additional_prompt=additional_prompt,
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            vis_control_image=vis_control_image,
+            num_samples=num_samples,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+    def preprocess_fake_scribble(
+        self,
+        input_image: np.ndarray,
+        image_resolution: int,
+        detect_resolution: int,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
+        control_image = apply_hed(resize_image(input_image, detect_resolution))
+        control_image = HWC3(control_image)
+        image = resize_image(input_image, image_resolution)
+        H, W = image.shape[:2]
+        control_image = cv2.resize(control_image, (W, H),
+                                   interpolation=cv2.INTER_LINEAR)
+        control_image = nms(control_image, 127, 3.0)
+        control_image = cv2.GaussianBlur(control_image, (0, 0), 3.0)
+        control_image[control_image > 4] = 255
+        control_image[control_image < 255] = 0
+        vis_control_image = 255 - control_image
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            vis_control_image)
+    @torch.inference_mode()
+    def process_fake_scribble(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_samples: int,
+        image_resolution: int,
+        detect_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_fake_scribble(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            detect_resolution=detect_resolution,
+        )
+        return self.process(
+            task_name='scribble',
+            prompt=prompt,
+            additional_prompt=additional_prompt,
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            vis_control_image=vis_control_image,
+            num_samples=num_samples,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+    def preprocess_pose(
+        self,
+        input_image: np.ndarray,
+        image_resolution: int,
+        detect_resolution: int,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
+        control_image, _ = apply_openpose(
             resize_image(input_image, detect_resolution))
+        control_image = HWC3(control_image)
+        image = resize_image(input_image, image_resolution)
+        H, W = image.shape[:2]
+        control_image = cv2.resize(control_image, (W, H),
+                                   interpolation=cv2.INTER_NEAREST)
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            control_image)
+    @torch.inference_mode()
+    def process_pose(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_samples: int,
+        image_resolution: int,
+        detect_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_pose(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            detect_resolution=detect_resolution,
+        )
+        return self.process(
+            task_name='pose',
+            prompt=prompt,
+            additional_prompt=additional_prompt,
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            vis_control_image=vis_control_image,
+            num_samples=num_samples,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+    def preprocess_seg(
+        self,
+        input_image: np.ndarray,
+        image_resolution: int,
+        detect_resolution: int,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
+        control_image = apply_uniformer(
             resize_image(input_image, detect_resolution))
+        image = resize_image(input_image, image_resolution)
+        H, W = image.shape[:2]
+        control_image = cv2.resize(control_image, (W, H),
+                                   interpolation=cv2.INTER_NEAREST)
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            control_image)
     @torch.inference_mode()
+    def process_seg(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_samples: int,
+        image_resolution: int,
+        detect_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_seg(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            detect_resolution=detect_resolution,
+        )
+        return self.process(
+            task_name='seg',
+            prompt=prompt,
+            additional_prompt=additional_prompt,
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            vis_control_image=vis_control_image,
+            num_samples=num_samples,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+    def preprocess_depth(
+        self,
+        input_image: np.ndarray,
+        image_resolution: int,
+        detect_resolution: int,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
+        control_image, _ = apply_midas(
             resize_image(input_image, detect_resolution))
+        control_image = HWC3(control_image)
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            control_image)
     @torch.inference_mode()
+    def process_depth(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_samples: int,
+        image_resolution: int,
+        detect_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_depth(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            detect_resolution=detect_resolution,
+        )
+        return self.process(
+            task_name='depth',
+            prompt=prompt,
+            additional_prompt=additional_prompt,
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            vis_control_image=vis_control_image,
+            num_samples=num_samples,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+    def preprocess_normal(
+        self,
+        input_image: np.ndarray,
+        image_resolution: int,
+        detect_resolution: int,
+        bg_threshold,
+    ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
         input_image = HWC3(input_image)
+        _, control_image = apply_midas(resize_image(input_image,
+                                                    detect_resolution),
+                                       bg_th=bg_threshold)
+        control_image = HWC3(control_image)
+        image = resize_image(input_image, image_resolution)
+        H, W = image.shape[:2]
+        control_image = cv2.resize(control_image, (W, H),
+                                   interpolation=cv2.INTER_LINEAR)
+        return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
+            control_image)
+    @torch.inference_mode()
+    def process_normal(
+        self,
+        input_image: np.ndarray,
+        prompt: str,
+        additional_prompt: str,
+        negative_prompt: str,
+        num_samples: int,
+        image_resolution: int,
+        detect_resolution: int,
+        num_steps: int,
+        guidance_scale: float,
+        seed: int,
+        bg_threshold,
+    ) -> list[PIL.Image.Image]:
+        control_image, vis_control_image = self.preprocess_normal(
+            input_image=input_image,
+            image_resolution=image_resolution,
+            detect_resolution=detect_resolution,
+            bg_threshold=bg_threshold,
+        )
+        return self.process(
+            task_name='normal',
+            prompt=prompt,
+            additional_prompt=additional_prompt,
+            negative_prompt=negative_prompt,
+            control_image=control_image,
+            vis_control_image=vis_control_image,
+            num_samples=num_samples,
+            num_steps=num_steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )

requirements.txt CHANGED Viewed

@@ -1,7 +1,9 @@
 addict==2.4.0
 albumentations==1.3.0
 einops==0.6.0
-gradio==3.18.0
 imageio==2.25.0
 imageio-ffmpeg==0.4.8
 kornia==0.6.9

 addict==2.4.0
 albumentations==1.3.0
 einops==0.6.0
+git+https://github.com/huggingface/accelerate@78151f8
+git+https://github.com/huggingface/diffusers@fa6d52d
+gradio==3.20.0
 imageio==2.25.0
 imageio-ffmpeg==0.4.8
 kornia==0.6.9