keep seed slider randomize method out of the queue

#4
by fffiloni - opened
.pre-commit-config.yaml CHANGED
@@ -35,13 +35,3 @@ repos:
35
  hooks:
36
  - id: yapf
37
  args: ['--parallel', '--in-place']
38
- - repo: https://github.com/kynan/nbstripout
39
- rev: 0.6.0
40
- hooks:
41
- - id: nbstripout
42
- args: ['--extra-keys', 'metadata.interpreter metadata.kernelspec cell.metadata.pycharm']
43
- - repo: https://github.com/nbQA-dev/nbQA
44
- rev: 1.6.4
45
- hooks:
46
- - id: nbqa-isort
47
- - id: nbqa-yapf
 
35
  hooks:
36
  - id: yapf
37
  args: ['--parallel', '--in-place']
 
 
 
 
 
 
 
 
 
 
LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2023 hysts
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -4,12 +4,10 @@ emoji: 🌖
4
  colorFrom: pink
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 3.36.1
8
- python_version: 3.10.11
9
  app_file: app.py
10
  pinned: false
11
- license: mit
12
- suggested_hardware: t4-medium
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
4
  colorFrom: pink
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 3.18.0
8
+ python_version: 3.10.9
9
  app_file: app.py
10
  pinned: false
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -8,7 +8,6 @@ import shlex
8
  import subprocess
9
 
10
  import gradio as gr
11
- import torch
12
 
13
  if os.getenv('SYSTEM') == 'spaces':
14
  with open('patch') as f:
@@ -31,127 +30,59 @@ for name in names:
31
  continue
32
  subprocess.run(shlex.split(command), cwd='ControlNet/annotator/ckpts/')
33
 
34
- from app_canny import create_demo as create_demo_canny
35
- from app_depth import create_demo as create_demo_depth
36
- from app_fake_scribble import create_demo as create_demo_fake_scribble
37
- from app_hed import create_demo as create_demo_hed
38
- from app_hough import create_demo as create_demo_hough
39
- from app_normal import create_demo as create_demo_normal
40
- from app_pose import create_demo as create_demo_pose
41
- from app_scribble import create_demo as create_demo_scribble
42
- from app_scribble_interactive import \
43
  create_demo as create_demo_scribble_interactive
44
- from app_seg import create_demo as create_demo_seg
45
- from model import Model, download_all_controlnet_weights
46
 
47
- DESCRIPTION = '''# [ControlNet v1.0](https://github.com/lllyasviel/ControlNet)
 
48
 
49
- <p class="note">New ControlNet v1.1 is available <a href="https://huggingface.co/spaces/hysts/ControlNet-v1-1">here</a>.</p>
50
- '''
51
-
52
- SPACE_ID = os.getenv('SPACE_ID')
53
- ALLOW_CHANGING_BASE_MODEL = SPACE_ID != 'hysts/ControlNet'
54
-
55
- if SPACE_ID is not None:
56
- DESCRIPTION += f'\n<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>'
57
- if not torch.cuda.is_available():
58
- DESCRIPTION += '\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>'
59
 
60
- if torch.cuda.is_available():
61
- if os.getenv('SYSTEM') == 'spaces':
62
- download_all_controlnet_weights()
63
-
64
- MAX_IMAGES = int(os.getenv('MAX_IMAGES', '3'))
65
- DEFAULT_NUM_IMAGES = min(MAX_IMAGES, int(os.getenv('DEFAULT_NUM_IMAGES', '1')))
 
 
66
 
67
- DEFAULT_MODEL_ID = os.getenv('DEFAULT_MODEL_ID',
68
- 'runwayml/stable-diffusion-v1-5')
69
- model = Model(base_model_id=DEFAULT_MODEL_ID, task_name='canny')
70
 
71
  with gr.Blocks(css='style.css') as demo:
72
  gr.Markdown(DESCRIPTION)
73
  with gr.Tabs():
74
  with gr.TabItem('Canny'):
75
- create_demo_canny(model.process_canny,
76
- max_images=MAX_IMAGES,
77
- default_num_images=DEFAULT_NUM_IMAGES)
78
  with gr.TabItem('Hough'):
79
- create_demo_hough(model.process_hough,
80
- max_images=MAX_IMAGES,
81
- default_num_images=DEFAULT_NUM_IMAGES)
82
  with gr.TabItem('HED'):
83
- create_demo_hed(model.process_hed,
84
- max_images=MAX_IMAGES,
85
- default_num_images=DEFAULT_NUM_IMAGES)
86
  with gr.TabItem('Scribble'):
87
- create_demo_scribble(model.process_scribble,
88
- max_images=MAX_IMAGES,
89
- default_num_images=DEFAULT_NUM_IMAGES)
90
  with gr.TabItem('Scribble Interactive'):
91
  create_demo_scribble_interactive(
92
- model.process_scribble_interactive,
93
- max_images=MAX_IMAGES,
94
- default_num_images=DEFAULT_NUM_IMAGES)
95
  with gr.TabItem('Fake Scribble'):
96
  create_demo_fake_scribble(model.process_fake_scribble,
97
- max_images=MAX_IMAGES,
98
- default_num_images=DEFAULT_NUM_IMAGES)
99
  with gr.TabItem('Pose'):
100
- create_demo_pose(model.process_pose,
101
- max_images=MAX_IMAGES,
102
- default_num_images=DEFAULT_NUM_IMAGES)
103
  with gr.TabItem('Segmentation'):
104
- create_demo_seg(model.process_seg,
105
- max_images=MAX_IMAGES,
106
- default_num_images=DEFAULT_NUM_IMAGES)
107
  with gr.TabItem('Depth'):
108
- create_demo_depth(model.process_depth,
109
- max_images=MAX_IMAGES,
110
- default_num_images=DEFAULT_NUM_IMAGES)
111
  with gr.TabItem('Normal map'):
112
- create_demo_normal(model.process_normal,
113
- max_images=MAX_IMAGES,
114
- default_num_images=DEFAULT_NUM_IMAGES)
115
-
116
- with gr.Accordion(label='Base model', open=False):
117
- with gr.Row():
118
- with gr.Column():
119
- current_base_model = gr.Text(label='Current base model')
120
- with gr.Column(scale=0.3):
121
- check_base_model_button = gr.Button('Check current base model')
122
- with gr.Row():
123
- with gr.Column():
124
- new_base_model_id = gr.Text(
125
- label='New base model',
126
- max_lines=1,
127
- placeholder='runwayml/stable-diffusion-v1-5',
128
- info=
129
- 'The base model must be compatible with Stable Diffusion v1.5.',
130
- interactive=ALLOW_CHANGING_BASE_MODEL)
131
- with gr.Column(scale=0.3):
132
- change_base_model_button = gr.Button(
133
- 'Change base model', interactive=ALLOW_CHANGING_BASE_MODEL)
134
- if not ALLOW_CHANGING_BASE_MODEL:
135
- gr.Markdown(
136
- '''The base model is not allowed to be changed in this Space so as not to slow down the demo, but it can be changed if you duplicate the Space. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a>'''
137
- )
138
-
139
- gr.Markdown('''### Related Spaces
140
-
141
- - [Space using Anything-v4.0 as base model](https://huggingface.co/spaces/hysts/ControlNet-with-Anything-v4)
142
- - https://huggingface.co/spaces/jonigata/PoseMaker2
143
- - https://huggingface.co/spaces/diffusers/controlnet-openpose
144
- - https://huggingface.co/spaces/diffusers/controlnet-canny
145
- ''')
146
-
147
- check_base_model_button.click(fn=lambda: model.base_model_id,
148
- outputs=current_base_model,
149
- queue=False)
150
- new_base_model_id.submit(fn=model.set_base_model,
151
- inputs=new_base_model_id,
152
- outputs=current_base_model)
153
- change_base_model_button.click(fn=model.set_base_model,
154
- inputs=new_base_model_id,
155
- outputs=current_base_model)
156
 
157
- demo.queue(api_open=False, max_size=10).launch()
 
8
  import subprocess
9
 
10
  import gradio as gr
 
11
 
12
  if os.getenv('SYSTEM') == 'spaces':
13
  with open('patch') as f:
 
30
  continue
31
  subprocess.run(shlex.split(command), cwd='ControlNet/annotator/ckpts/')
32
 
33
+ from gradio_canny2image import create_demo as create_demo_canny
34
+ from gradio_depth2image import create_demo as create_demo_depth
35
+ from gradio_fake_scribble2image import create_demo as create_demo_fake_scribble
36
+ from gradio_hed2image import create_demo as create_demo_hed
37
+ from gradio_hough2image import create_demo as create_demo_hough
38
+ from gradio_normal2image import create_demo as create_demo_normal
39
+ from gradio_pose2image import create_demo as create_demo_pose
40
+ from gradio_scribble2image import create_demo as create_demo_scribble
41
+ from gradio_scribble2image_interactive import \
42
  create_demo as create_demo_scribble_interactive
43
+ from gradio_seg2image import create_demo as create_demo_seg
44
+ from model import Model
45
 
46
+ MAX_IMAGES = 1
47
+ DESCRIPTION = '''# ControlNet
48
 
49
+ This is an unofficial demo for [https://github.com/lllyasviel/ControlNet](https://github.com/lllyasviel/ControlNet).
 
 
 
 
 
 
 
 
 
50
 
51
+ If you are interested in trying out other base models, check out [this Space](https://huggingface.co/spaces/hysts/ControlNet-with-other-models) as well.
52
+ '''
53
+ if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
54
+ DESCRIPTION += f'''<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.<br/>
55
+ <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true">
56
+ <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
57
+ <p/>
58
+ '''
59
 
60
+ model = Model()
 
 
61
 
62
  with gr.Blocks(css='style.css') as demo:
63
  gr.Markdown(DESCRIPTION)
64
  with gr.Tabs():
65
  with gr.TabItem('Canny'):
66
+ create_demo_canny(model.process_canny, max_images=MAX_IMAGES)
 
 
67
  with gr.TabItem('Hough'):
68
+ create_demo_hough(model.process_hough, max_images=MAX_IMAGES)
 
 
69
  with gr.TabItem('HED'):
70
+ create_demo_hed(model.process_hed, max_images=MAX_IMAGES)
 
 
71
  with gr.TabItem('Scribble'):
72
+ create_demo_scribble(model.process_scribble, max_images=MAX_IMAGES)
 
 
73
  with gr.TabItem('Scribble Interactive'):
74
  create_demo_scribble_interactive(
75
+ model.process_scribble_interactive, max_images=MAX_IMAGES)
 
 
76
  with gr.TabItem('Fake Scribble'):
77
  create_demo_fake_scribble(model.process_fake_scribble,
78
+ max_images=MAX_IMAGES)
 
79
  with gr.TabItem('Pose'):
80
+ create_demo_pose(model.process_pose, max_images=MAX_IMAGES)
 
 
81
  with gr.TabItem('Segmentation'):
82
+ create_demo_seg(model.process_seg, max_images=MAX_IMAGES)
 
 
83
  with gr.TabItem('Depth'):
84
+ create_demo_depth(model.process_depth, max_images=MAX_IMAGES)
 
 
85
  with gr.TabItem('Normal map'):
86
+ create_demo_normal(model.process_normal, max_images=MAX_IMAGES)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ demo.queue(api_open=False).launch()
app_canny.py → gradio_canny2image.py RENAMED
@@ -3,7 +3,7 @@
3
  import gradio as gr
4
 
5
 
6
- def create_demo(process, max_images=12, default_num_images=3):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Canny Edge Maps')
@@ -16,40 +16,40 @@ def create_demo(process, max_images=12, default_num_images=3):
16
  num_samples = gr.Slider(label='Images',
17
  minimum=1,
18
  maximum=max_images,
19
- value=default_num_images,
20
  step=1)
21
  image_resolution = gr.Slider(label='Image Resolution',
22
  minimum=256,
23
- maximum=512,
24
  value=512,
25
  step=256)
26
- canny_low_threshold = gr.Slider(
27
- label='Canny low threshold',
28
- minimum=1,
29
- maximum=255,
30
- value=100,
31
- step=1)
32
- canny_high_threshold = gr.Slider(
33
- label='Canny high threshold',
34
- minimum=1,
35
- maximum=255,
36
- value=200,
37
- step=1)
38
- num_steps = gr.Slider(label='Steps',
39
- minimum=1,
40
- maximum=100,
41
- value=20,
42
- step=1)
43
- guidance_scale = gr.Slider(label='Guidance Scale',
44
- minimum=0.1,
45
- maximum=30.0,
46
- value=9.0,
47
- step=0.1)
48
  seed = gr.Slider(label='Seed',
49
  minimum=-1,
50
  maximum=2147483647,
51
  step=1,
52
- randomize=True)
 
 
53
  a_prompt = gr.Textbox(
54
  label='Added Prompt',
55
  value='best quality, extremely detailed')
@@ -59,33 +59,17 @@ def create_demo(process, max_images=12, default_num_images=3):
59
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
60
  )
61
  with gr.Column():
62
- result = gr.Gallery(label='Output',
63
- show_label=False,
64
- elem_id='gallery').style(grid=2,
65
- height='auto')
66
- inputs = [
67
- input_image,
68
- prompt,
69
- a_prompt,
70
- n_prompt,
71
- num_samples,
72
- image_resolution,
73
- num_steps,
74
- guidance_scale,
75
- seed,
76
- canny_low_threshold,
77
- canny_high_threshold,
78
  ]
79
- prompt.submit(fn=process, inputs=inputs, outputs=result)
80
  run_button.click(fn=process,
81
- inputs=inputs,
82
- outputs=result,
83
  api_name='canny')
84
  return demo
85
-
86
-
87
- if __name__ == '__main__':
88
- from model import Model
89
- model = Model()
90
- demo = create_demo(model.process_canny)
91
- demo.queue().launch()
 
3
  import gradio as gr
4
 
5
 
6
+ def create_demo(process, max_images=12):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Canny Edge Maps')
 
16
  num_samples = gr.Slider(label='Images',
17
  minimum=1,
18
  maximum=max_images,
19
+ value=1,
20
  step=1)
21
  image_resolution = gr.Slider(label='Image Resolution',
22
  minimum=256,
23
+ maximum=768,
24
  value=512,
25
  step=256)
26
+ low_threshold = gr.Slider(label='Canny low threshold',
27
+ minimum=1,
28
+ maximum=255,
29
+ value=100,
30
+ step=1)
31
+ high_threshold = gr.Slider(label='Canny high threshold',
32
+ minimum=1,
33
+ maximum=255,
34
+ value=200,
35
+ step=1)
36
+ ddim_steps = gr.Slider(label='Steps',
37
+ minimum=1,
38
+ maximum=100,
39
+ value=20,
40
+ step=1)
41
+ scale = gr.Slider(label='Guidance Scale',
42
+ minimum=0.1,
43
+ maximum=30.0,
44
+ value=9.0,
45
+ step=0.1)
 
 
46
  seed = gr.Slider(label='Seed',
47
  minimum=-1,
48
  maximum=2147483647,
49
  step=1,
50
+ randomize=True,
51
+ queue=False)
52
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
53
  a_prompt = gr.Textbox(
54
  label='Added Prompt',
55
  value='best quality, extremely detailed')
 
59
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
60
  )
61
  with gr.Column():
62
+ result_gallery = gr.Gallery(label='Output',
63
+ show_label=False,
64
+ elem_id='gallery').style(
65
+ grid=2, height='auto')
66
+ ips = [
67
+ input_image, prompt, a_prompt, n_prompt, num_samples,
68
+ image_resolution, ddim_steps, scale, seed, eta, low_threshold,
69
+ high_threshold
 
 
 
 
 
 
 
 
70
  ]
 
71
  run_button.click(fn=process,
72
+ inputs=ips,
73
+ outputs=[result_gallery],
74
  api_name='canny')
75
  return demo
 
 
 
 
 
 
 
app_depth.py → gradio_depth2image.py RENAMED
@@ -3,7 +3,7 @@
3
  import gradio as gr
4
 
5
 
6
- def create_demo(process, max_images=12, default_num_images=3):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Depth Maps')
@@ -13,38 +13,38 @@ def create_demo(process, max_images=12, default_num_images=3):
13
  prompt = gr.Textbox(label='Prompt')
14
  run_button = gr.Button(label='Run')
15
  with gr.Accordion('Advanced options', open=False):
16
- is_depth_image = gr.Checkbox(label='Is depth image',
17
- value=False)
18
  num_samples = gr.Slider(label='Images',
19
  minimum=1,
20
  maximum=max_images,
21
- value=default_num_images,
22
  step=1)
23
  image_resolution = gr.Slider(label='Image Resolution',
24
  minimum=256,
25
- maximum=512,
26
  value=512,
27
  step=256)
28
  detect_resolution = gr.Slider(label='Depth Resolution',
29
  minimum=128,
30
- maximum=512,
31
  value=384,
32
  step=1)
33
- num_steps = gr.Slider(label='Steps',
34
- minimum=1,
35
- maximum=100,
36
- value=20,
37
- step=1)
38
- guidance_scale = gr.Slider(label='Guidance Scale',
39
- minimum=0.1,
40
- maximum=30.0,
41
- value=9.0,
42
- step=0.1)
43
  seed = gr.Slider(label='Seed',
44
  minimum=-1,
45
  maximum=2147483647,
46
  step=1,
47
- randomize=True)
 
 
48
  a_prompt = gr.Textbox(
49
  label='Added Prompt',
50
  value='best quality, extremely detailed')
@@ -54,33 +54,16 @@ def create_demo(process, max_images=12, default_num_images=3):
54
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
55
  )
56
  with gr.Column():
57
- result = gr.Gallery(label='Output',
58
- show_label=False,
59
- elem_id='gallery').style(grid=2,
60
- height='auto')
61
- inputs = [
62
- input_image,
63
- prompt,
64
- a_prompt,
65
- n_prompt,
66
- num_samples,
67
- image_resolution,
68
- detect_resolution,
69
- num_steps,
70
- guidance_scale,
71
- seed,
72
- is_depth_image,
73
  ]
74
- prompt.submit(fn=process, inputs=inputs, outputs=result)
75
  run_button.click(fn=process,
76
- inputs=inputs,
77
- outputs=result,
78
  api_name='depth')
79
  return demo
80
-
81
-
82
- if __name__ == '__main__':
83
- from model import Model
84
- model = Model()
85
- demo = create_demo(model.process_depth)
86
- demo.queue().launch()
 
3
  import gradio as gr
4
 
5
 
6
+ def create_demo(process, max_images=12):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Depth Maps')
 
13
  prompt = gr.Textbox(label='Prompt')
14
  run_button = gr.Button(label='Run')
15
  with gr.Accordion('Advanced options', open=False):
 
 
16
  num_samples = gr.Slider(label='Images',
17
  minimum=1,
18
  maximum=max_images,
19
+ value=1,
20
  step=1)
21
  image_resolution = gr.Slider(label='Image Resolution',
22
  minimum=256,
23
+ maximum=768,
24
  value=512,
25
  step=256)
26
  detect_resolution = gr.Slider(label='Depth Resolution',
27
  minimum=128,
28
+ maximum=1024,
29
  value=384,
30
  step=1)
31
+ ddim_steps = gr.Slider(label='Steps',
32
+ minimum=1,
33
+ maximum=100,
34
+ value=20,
35
+ step=1)
36
+ scale = gr.Slider(label='Guidance Scale',
37
+ minimum=0.1,
38
+ maximum=30.0,
39
+ value=9.0,
40
+ step=0.1)
41
  seed = gr.Slider(label='Seed',
42
  minimum=-1,
43
  maximum=2147483647,
44
  step=1,
45
+ randomize=True,
46
+ queue=False)
47
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
48
  a_prompt = gr.Textbox(
49
  label='Added Prompt',
50
  value='best quality, extremely detailed')
 
54
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
55
  )
56
  with gr.Column():
57
+ result_gallery = gr.Gallery(label='Output',
58
+ show_label=False,
59
+ elem_id='gallery').style(
60
+ grid=2, height='auto')
61
+ ips = [
62
+ input_image, prompt, a_prompt, n_prompt, num_samples,
63
+ image_resolution, detect_resolution, ddim_steps, scale, seed, eta
 
 
 
 
 
 
 
 
 
64
  ]
 
65
  run_button.click(fn=process,
66
+ inputs=ips,
67
+ outputs=[result_gallery],
68
  api_name='depth')
69
  return demo
 
 
 
 
 
 
 
app_fake_scribble.py → gradio_fake_scribble2image.py RENAMED
@@ -3,7 +3,7 @@
3
  import gradio as gr
4
 
5
 
6
- def create_demo(process, max_images=12, default_num_images=3):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Fake Scribble Maps')
@@ -16,33 +16,35 @@ def create_demo(process, max_images=12, default_num_images=3):
16
  num_samples = gr.Slider(label='Images',
17
  minimum=1,
18
  maximum=max_images,
19
- value=default_num_images,
20
  step=1)
21
  image_resolution = gr.Slider(label='Image Resolution',
22
  minimum=256,
23
- maximum=512,
24
  value=512,
25
  step=256)
26
  detect_resolution = gr.Slider(label='HED Resolution',
27
  minimum=128,
28
- maximum=512,
29
  value=512,
30
  step=1)
31
- num_steps = gr.Slider(label='Steps',
32
- minimum=1,
33
- maximum=100,
34
- value=20,
35
- step=1)
36
- guidance_scale = gr.Slider(label='Guidance Scale',
37
- minimum=0.1,
38
- maximum=30.0,
39
- value=9.0,
40
- step=0.1)
41
  seed = gr.Slider(label='Seed',
42
  minimum=-1,
43
  maximum=2147483647,
44
  step=1,
45
- randomize=True)
 
 
46
  a_prompt = gr.Textbox(
47
  label='Added Prompt',
48
  value='best quality, extremely detailed')
@@ -52,32 +54,16 @@ def create_demo(process, max_images=12, default_num_images=3):
52
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
53
  )
54
  with gr.Column():
55
- result = gr.Gallery(label='Output',
56
- show_label=False,
57
- elem_id='gallery').style(grid=2,
58
- height='auto')
59
- inputs = [
60
- input_image,
61
- prompt,
62
- a_prompt,
63
- n_prompt,
64
- num_samples,
65
- image_resolution,
66
- detect_resolution,
67
- num_steps,
68
- guidance_scale,
69
- seed,
70
  ]
71
- prompt.submit(fn=process, inputs=inputs, outputs=result)
72
  run_button.click(fn=process,
73
- inputs=inputs,
74
- outputs=result,
75
  api_name='fake_scribble')
76
  return demo
77
-
78
-
79
- if __name__ == '__main__':
80
- from model import Model
81
- model = Model()
82
- demo = create_demo(model.process_fake_scribble)
83
- demo.queue().launch()
 
3
  import gradio as gr
4
 
5
 
6
+ def create_demo(process, max_images=12):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Fake Scribble Maps')
 
16
  num_samples = gr.Slider(label='Images',
17
  minimum=1,
18
  maximum=max_images,
19
+ value=1,
20
  step=1)
21
  image_resolution = gr.Slider(label='Image Resolution',
22
  minimum=256,
23
+ maximum=768,
24
  value=512,
25
  step=256)
26
  detect_resolution = gr.Slider(label='HED Resolution',
27
  minimum=128,
28
+ maximum=1024,
29
  value=512,
30
  step=1)
31
+ ddim_steps = gr.Slider(label='Steps',
32
+ minimum=1,
33
+ maximum=100,
34
+ value=20,
35
+ step=1)
36
+ scale = gr.Slider(label='Guidance Scale',
37
+ minimum=0.1,
38
+ maximum=30.0,
39
+ value=9.0,
40
+ step=0.1)
41
  seed = gr.Slider(label='Seed',
42
  minimum=-1,
43
  maximum=2147483647,
44
  step=1,
45
+ randomize=True,
46
+ queue=False)
47
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
48
  a_prompt = gr.Textbox(
49
  label='Added Prompt',
50
  value='best quality, extremely detailed')
 
54
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
55
  )
56
  with gr.Column():
57
+ result_gallery = gr.Gallery(label='Output',
58
+ show_label=False,
59
+ elem_id='gallery').style(
60
+ grid=2, height='auto')
61
+ ips = [
62
+ input_image, prompt, a_prompt, n_prompt, num_samples,
63
+ image_resolution, detect_resolution, ddim_steps, scale, seed, eta
 
 
 
 
 
 
 
 
64
  ]
 
65
  run_button.click(fn=process,
66
+ inputs=ips,
67
+ outputs=[result_gallery],
68
  api_name='fake_scribble')
69
  return demo
 
 
 
 
 
 
 
app_hed.py → gradio_hed2image.py RENAMED
@@ -3,7 +3,7 @@
3
  import gradio as gr
4
 
5
 
6
- def create_demo(process, max_images=12, default_num_images=3):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with HED Maps')
@@ -16,33 +16,35 @@ def create_demo(process, max_images=12, default_num_images=3):
16
  num_samples = gr.Slider(label='Images',
17
  minimum=1,
18
  maximum=max_images,
19
- value=default_num_images,
20
  step=1)
21
  image_resolution = gr.Slider(label='Image Resolution',
22
  minimum=256,
23
- maximum=512,
24
  value=512,
25
  step=256)
26
  detect_resolution = gr.Slider(label='HED Resolution',
27
  minimum=128,
28
- maximum=512,
29
  value=512,
30
  step=1)
31
- num_steps = gr.Slider(label='Steps',
32
- minimum=1,
33
- maximum=100,
34
- value=20,
35
- step=1)
36
- guidance_scale = gr.Slider(label='Guidance Scale',
37
- minimum=0.1,
38
- maximum=30.0,
39
- value=9.0,
40
- step=0.1)
41
  seed = gr.Slider(label='Seed',
42
  minimum=-1,
43
  maximum=2147483647,
44
  step=1,
45
- randomize=True)
 
 
46
  a_prompt = gr.Textbox(
47
  label='Added Prompt',
48
  value='best quality, extremely detailed')
@@ -52,32 +54,16 @@ def create_demo(process, max_images=12, default_num_images=3):
52
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
53
  )
54
  with gr.Column():
55
- result = gr.Gallery(label='Output',
56
- show_label=False,
57
- elem_id='gallery').style(grid=2,
58
- height='auto')
59
- inputs = [
60
- input_image,
61
- prompt,
62
- a_prompt,
63
- n_prompt,
64
- num_samples,
65
- image_resolution,
66
- detect_resolution,
67
- num_steps,
68
- guidance_scale,
69
- seed,
70
  ]
71
- prompt.submit(fn=process, inputs=inputs, outputs=result)
72
  run_button.click(fn=process,
73
- inputs=inputs,
74
- outputs=result,
75
  api_name='hed')
76
  return demo
77
-
78
-
79
- if __name__ == '__main__':
80
- from model import Model
81
- model = Model()
82
- demo = create_demo(model.process_hed)
83
- demo.queue().launch()
 
3
  import gradio as gr
4
 
5
 
6
+ def create_demo(process, max_images=12):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with HED Maps')
 
16
  num_samples = gr.Slider(label='Images',
17
  minimum=1,
18
  maximum=max_images,
19
+ value=1,
20
  step=1)
21
  image_resolution = gr.Slider(label='Image Resolution',
22
  minimum=256,
23
+ maximum=768,
24
  value=512,
25
  step=256)
26
  detect_resolution = gr.Slider(label='HED Resolution',
27
  minimum=128,
28
+ maximum=1024,
29
  value=512,
30
  step=1)
31
+ ddim_steps = gr.Slider(label='Steps',
32
+ minimum=1,
33
+ maximum=100,
34
+ value=20,
35
+ step=1)
36
+ scale = gr.Slider(label='Guidance Scale',
37
+ minimum=0.1,
38
+ maximum=30.0,
39
+ value=9.0,
40
+ step=0.1)
41
  seed = gr.Slider(label='Seed',
42
  minimum=-1,
43
  maximum=2147483647,
44
  step=1,
45
+ randomize=True,
46
+ queue=False)
47
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
48
  a_prompt = gr.Textbox(
49
  label='Added Prompt',
50
  value='best quality, extremely detailed')
 
54
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
55
  )
56
  with gr.Column():
57
+ result_gallery = gr.Gallery(label='Output',
58
+ show_label=False,
59
+ elem_id='gallery').style(
60
+ grid=2, height='auto')
61
+ ips = [
62
+ input_image, prompt, a_prompt, n_prompt, num_samples,
63
+ image_resolution, detect_resolution, ddim_steps, scale, seed, eta
 
 
 
 
 
 
 
 
64
  ]
 
65
  run_button.click(fn=process,
66
+ inputs=ips,
67
+ outputs=[result_gallery],
68
  api_name='hed')
69
  return demo
 
 
 
 
 
 
 
app_hough.py → gradio_hough2image.py RENAMED
@@ -3,7 +3,7 @@
3
  import gradio as gr
4
 
5
 
6
- def create_demo(process, max_images=12, default_num_images=3):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Hough Line Maps')
@@ -16,45 +16,47 @@ def create_demo(process, max_images=12, default_num_images=3):
16
  num_samples = gr.Slider(label='Images',
17
  minimum=1,
18
  maximum=max_images,
19
- value=default_num_images,
20
  step=1)
21
  image_resolution = gr.Slider(label='Image Resolution',
22
  minimum=256,
23
- maximum=512,
24
  value=512,
25
  step=256)
26
  detect_resolution = gr.Slider(label='Hough Resolution',
27
  minimum=128,
28
- maximum=512,
29
  value=512,
30
  step=1)
31
- mlsd_value_threshold = gr.Slider(
32
  label='Hough value threshold (MLSD)',
33
  minimum=0.01,
34
  maximum=2.0,
35
  value=0.1,
36
  step=0.01)
37
- mlsd_distance_threshold = gr.Slider(
38
  label='Hough distance threshold (MLSD)',
39
  minimum=0.01,
40
  maximum=20.0,
41
  value=0.1,
42
  step=0.01)
43
- num_steps = gr.Slider(label='Steps',
44
- minimum=1,
45
- maximum=100,
46
- value=20,
47
- step=1)
48
- guidance_scale = gr.Slider(label='Guidance Scale',
49
- minimum=0.1,
50
- maximum=30.0,
51
- value=9.0,
52
- step=0.1)
53
  seed = gr.Slider(label='Seed',
54
  minimum=-1,
55
  maximum=2147483647,
56
  step=1,
57
- randomize=True)
 
 
58
  a_prompt = gr.Textbox(
59
  label='Added Prompt',
60
  value='best quality, extremely detailed')
@@ -64,34 +66,17 @@ def create_demo(process, max_images=12, default_num_images=3):
64
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
65
  )
66
  with gr.Column():
67
- result = gr.Gallery(label='Output',
68
- show_label=False,
69
- elem_id='gallery').style(grid=2,
70
- height='auto')
71
- inputs = [
72
- input_image,
73
- prompt,
74
- a_prompt,
75
- n_prompt,
76
- num_samples,
77
- image_resolution,
78
- detect_resolution,
79
- num_steps,
80
- guidance_scale,
81
- seed,
82
- mlsd_value_threshold,
83
- mlsd_distance_threshold,
84
  ]
85
- prompt.submit(fn=process, inputs=inputs, outputs=result)
86
  run_button.click(fn=process,
87
- inputs=inputs,
88
- outputs=result,
89
  api_name='hough')
90
  return demo
91
-
92
-
93
- if __name__ == '__main__':
94
- from model import Model
95
- model = Model()
96
- demo = create_demo(model.process_hough)
97
- demo.queue().launch()
 
3
  import gradio as gr
4
 
5
 
6
+ def create_demo(process, max_images=12):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Hough Line Maps')
 
16
  num_samples = gr.Slider(label='Images',
17
  minimum=1,
18
  maximum=max_images,
19
+ value=1,
20
  step=1)
21
  image_resolution = gr.Slider(label='Image Resolution',
22
  minimum=256,
23
+ maximum=768,
24
  value=512,
25
  step=256)
26
  detect_resolution = gr.Slider(label='Hough Resolution',
27
  minimum=128,
28
+ maximum=1024,
29
  value=512,
30
  step=1)
31
+ value_threshold = gr.Slider(
32
  label='Hough value threshold (MLSD)',
33
  minimum=0.01,
34
  maximum=2.0,
35
  value=0.1,
36
  step=0.01)
37
+ distance_threshold = gr.Slider(
38
  label='Hough distance threshold (MLSD)',
39
  minimum=0.01,
40
  maximum=20.0,
41
  value=0.1,
42
  step=0.01)
43
+ ddim_steps = gr.Slider(label='Steps',
44
+ minimum=1,
45
+ maximum=100,
46
+ value=20,
47
+ step=1)
48
+ scale = gr.Slider(label='Guidance Scale',
49
+ minimum=0.1,
50
+ maximum=30.0,
51
+ value=9.0,
52
+ step=0.1)
53
  seed = gr.Slider(label='Seed',
54
  minimum=-1,
55
  maximum=2147483647,
56
  step=1,
57
+ randomize=True,
58
+ queue=False)
59
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
60
  a_prompt = gr.Textbox(
61
  label='Added Prompt',
62
  value='best quality, extremely detailed')
 
66
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
67
  )
68
  with gr.Column():
69
+ result_gallery = gr.Gallery(label='Output',
70
+ show_label=False,
71
+ elem_id='gallery').style(
72
+ grid=2, height='auto')
73
+ ips = [
74
+ input_image, prompt, a_prompt, n_prompt, num_samples,
75
+ image_resolution, detect_resolution, ddim_steps, scale, seed, eta,
76
+ value_threshold, distance_threshold
 
 
 
 
 
 
 
 
 
77
  ]
 
78
  run_button.click(fn=process,
79
+ inputs=ips,
80
+ outputs=[result_gallery],
81
  api_name='hough')
82
  return demo
 
 
 
 
 
 
 
app_normal.py → gradio_normal2image.py RENAMED
@@ -3,7 +3,7 @@
3
  import gradio as gr
4
 
5
 
6
- def create_demo(process, max_images=12, default_num_images=3):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Normal Maps')
@@ -13,21 +13,19 @@ def create_demo(process, max_images=12, default_num_images=3):
13
  prompt = gr.Textbox(label='Prompt')
14
  run_button = gr.Button(label='Run')
15
  with gr.Accordion('Advanced options', open=False):
16
- is_normal_image = gr.Checkbox(label='Is normal image',
17
- value=False)
18
  num_samples = gr.Slider(label='Images',
19
  minimum=1,
20
  maximum=max_images,
21
- value=default_num_images,
22
  step=1)
23
  image_resolution = gr.Slider(label='Image Resolution',
24
  minimum=256,
25
- maximum=512,
26
  value=512,
27
  step=256)
28
  detect_resolution = gr.Slider(label='Normal Resolution',
29
  minimum=128,
30
- maximum=512,
31
  value=384,
32
  step=1)
33
  bg_threshold = gr.Slider(
@@ -36,21 +34,23 @@ def create_demo(process, max_images=12, default_num_images=3):
36
  maximum=1.0,
37
  value=0.4,
38
  step=0.01)
39
- num_steps = gr.Slider(label='Steps',
40
- minimum=1,
41
- maximum=100,
42
- value=20,
43
- step=1)
44
- guidance_scale = gr.Slider(label='Guidance Scale',
45
- minimum=0.1,
46
- maximum=30.0,
47
- value=9.0,
48
- step=0.1)
49
  seed = gr.Slider(label='Seed',
50
  minimum=-1,
51
  maximum=2147483647,
52
  step=1,
53
- randomize=True)
 
 
54
  a_prompt = gr.Textbox(
55
  label='Added Prompt',
56
  value='best quality, extremely detailed')
@@ -60,34 +60,17 @@ def create_demo(process, max_images=12, default_num_images=3):
60
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
61
  )
62
  with gr.Column():
63
- result = gr.Gallery(label='Output',
64
- show_label=False,
65
- elem_id='gallery').style(grid=2,
66
- height='auto')
67
- inputs = [
68
- input_image,
69
- prompt,
70
- a_prompt,
71
- n_prompt,
72
- num_samples,
73
- image_resolution,
74
- detect_resolution,
75
- num_steps,
76
- guidance_scale,
77
- seed,
78
- bg_threshold,
79
- is_normal_image,
80
  ]
81
- prompt.submit(fn=process, inputs=inputs, outputs=result)
82
  run_button.click(fn=process,
83
- inputs=inputs,
84
- outputs=result,
85
  api_name='normal')
86
  return demo
87
-
88
-
89
- if __name__ == '__main__':
90
- from model import Model
91
- model = Model()
92
- demo = create_demo(model.process_normal)
93
- demo.queue().launch()
 
3
  import gradio as gr
4
 
5
 
6
+ def create_demo(process, max_images=12):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Normal Maps')
 
13
  prompt = gr.Textbox(label='Prompt')
14
  run_button = gr.Button(label='Run')
15
  with gr.Accordion('Advanced options', open=False):
 
 
16
  num_samples = gr.Slider(label='Images',
17
  minimum=1,
18
  maximum=max_images,
19
+ value=1,
20
  step=1)
21
  image_resolution = gr.Slider(label='Image Resolution',
22
  minimum=256,
23
+ maximum=768,
24
  value=512,
25
  step=256)
26
  detect_resolution = gr.Slider(label='Normal Resolution',
27
  minimum=128,
28
+ maximum=1024,
29
  value=384,
30
  step=1)
31
  bg_threshold = gr.Slider(
 
34
  maximum=1.0,
35
  value=0.4,
36
  step=0.01)
37
+ ddim_steps = gr.Slider(label='Steps',
38
+ minimum=1,
39
+ maximum=100,
40
+ value=20,
41
+ step=1)
42
+ scale = gr.Slider(label='Guidance Scale',
43
+ minimum=0.1,
44
+ maximum=30.0,
45
+ value=9.0,
46
+ step=0.1)
47
  seed = gr.Slider(label='Seed',
48
  minimum=-1,
49
  maximum=2147483647,
50
  step=1,
51
+ randomize=True,
52
+ queue=False)
53
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
54
  a_prompt = gr.Textbox(
55
  label='Added Prompt',
56
  value='best quality, extremely detailed')
 
60
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
61
  )
62
  with gr.Column():
63
+ result_gallery = gr.Gallery(label='Output',
64
+ show_label=False,
65
+ elem_id='gallery').style(
66
+ grid=2, height='auto')
67
+ ips = [
68
+ input_image, prompt, a_prompt, n_prompt, num_samples,
69
+ image_resolution, detect_resolution, ddim_steps, scale, seed, eta,
70
+ bg_threshold
 
 
 
 
 
 
 
 
 
71
  ]
 
72
  run_button.click(fn=process,
73
+ inputs=ips,
74
+ outputs=[result_gallery],
75
  api_name='normal')
76
  return demo
 
 
 
 
 
 
 
app_pose.py → gradio_pose2image.py RENAMED
@@ -3,7 +3,7 @@
3
  import gradio as gr
4
 
5
 
6
- def create_demo(process, max_images=12, default_num_images=3):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Human Pose')
@@ -13,41 +13,38 @@ def create_demo(process, max_images=12, default_num_images=3):
13
  prompt = gr.Textbox(label='Prompt')
14
  run_button = gr.Button(label='Run')
15
  with gr.Accordion('Advanced options', open=False):
16
- is_pose_image = gr.Checkbox(label='Is pose image',
17
- value=False)
18
- gr.Markdown(
19
- 'You can use [PoseMaker2](https://huggingface.co/spaces/jonigata/PoseMaker2) to create pose images.'
20
- )
21
  num_samples = gr.Slider(label='Images',
22
  minimum=1,
23
  maximum=max_images,
24
- value=default_num_images,
25
  step=1)
26
  image_resolution = gr.Slider(label='Image Resolution',
27
  minimum=256,
28
- maximum=512,
29
  value=512,
30
  step=256)
31
  detect_resolution = gr.Slider(label='OpenPose Resolution',
32
  minimum=128,
33
- maximum=512,
34
  value=512,
35
  step=1)
36
- num_steps = gr.Slider(label='Steps',
37
- minimum=1,
38
- maximum=100,
39
- value=20,
40
- step=1)
41
- guidance_scale = gr.Slider(label='Guidance Scale',
42
- minimum=0.1,
43
- maximum=30.0,
44
- value=9.0,
45
- step=0.1)
46
  seed = gr.Slider(label='Seed',
47
  minimum=-1,
48
  maximum=2147483647,
49
  step=1,
50
- randomize=True)
 
 
51
  a_prompt = gr.Textbox(
52
  label='Added Prompt',
53
  value='best quality, extremely detailed')
@@ -57,33 +54,16 @@ def create_demo(process, max_images=12, default_num_images=3):
57
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
58
  )
59
  with gr.Column():
60
- result = gr.Gallery(label='Output',
61
- show_label=False,
62
- elem_id='gallery').style(grid=2,
63
- height='auto')
64
- inputs = [
65
- input_image,
66
- prompt,
67
- a_prompt,
68
- n_prompt,
69
- num_samples,
70
- image_resolution,
71
- detect_resolution,
72
- num_steps,
73
- guidance_scale,
74
- seed,
75
- is_pose_image,
76
  ]
77
- prompt.submit(fn=process, inputs=inputs, outputs=result)
78
  run_button.click(fn=process,
79
- inputs=inputs,
80
- outputs=result,
81
  api_name='pose')
82
  return demo
83
-
84
-
85
- if __name__ == '__main__':
86
- from model import Model
87
- model = Model()
88
- demo = create_demo(model.process_pose)
89
- demo.queue().launch()
 
3
  import gradio as gr
4
 
5
 
6
+ def create_demo(process, max_images=12):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Human Pose')
 
13
  prompt = gr.Textbox(label='Prompt')
14
  run_button = gr.Button(label='Run')
15
  with gr.Accordion('Advanced options', open=False):
 
 
 
 
 
16
  num_samples = gr.Slider(label='Images',
17
  minimum=1,
18
  maximum=max_images,
19
+ value=1,
20
  step=1)
21
  image_resolution = gr.Slider(label='Image Resolution',
22
  minimum=256,
23
+ maximum=768,
24
  value=512,
25
  step=256)
26
  detect_resolution = gr.Slider(label='OpenPose Resolution',
27
  minimum=128,
28
+ maximum=1024,
29
  value=512,
30
  step=1)
31
+ ddim_steps = gr.Slider(label='Steps',
32
+ minimum=1,
33
+ maximum=100,
34
+ value=20,
35
+ step=1)
36
+ scale = gr.Slider(label='Guidance Scale',
37
+ minimum=0.1,
38
+ maximum=30.0,
39
+ value=9.0,
40
+ step=0.1)
41
  seed = gr.Slider(label='Seed',
42
  minimum=-1,
43
  maximum=2147483647,
44
  step=1,
45
+ randomize=True,
46
+ queue=False)
47
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
48
  a_prompt = gr.Textbox(
49
  label='Added Prompt',
50
  value='best quality, extremely detailed')
 
54
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
55
  )
56
  with gr.Column():
57
+ result_gallery = gr.Gallery(label='Output',
58
+ show_label=False,
59
+ elem_id='gallery').style(
60
+ grid=2, height='auto')
61
+ ips = [
62
+ input_image, prompt, a_prompt, n_prompt, num_samples,
63
+ image_resolution, detect_resolution, ddim_steps, scale, seed, eta
 
 
 
 
 
 
 
 
 
64
  ]
 
65
  run_button.click(fn=process,
66
+ inputs=ips,
67
+ outputs=[result_gallery],
68
  api_name='pose')
69
  return demo
 
 
 
 
 
 
 
app_scribble.py → gradio_scribble2image.py RENAMED
@@ -3,7 +3,7 @@
3
  import gradio as gr
4
 
5
 
6
- def create_demo(process, max_images=12, default_num_images=3):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Scribble Maps')
@@ -16,28 +16,30 @@ def create_demo(process, max_images=12, default_num_images=3):
16
  num_samples = gr.Slider(label='Images',
17
  minimum=1,
18
  maximum=max_images,
19
- value=default_num_images,
20
  step=1)
21
  image_resolution = gr.Slider(label='Image Resolution',
22
  minimum=256,
23
- maximum=512,
24
  value=512,
25
  step=256)
26
- num_steps = gr.Slider(label='Steps',
27
- minimum=1,
28
- maximum=100,
29
- value=20,
30
- step=1)
31
- guidance_scale = gr.Slider(label='Guidance Scale',
32
- minimum=0.1,
33
- maximum=30.0,
34
- value=9.0,
35
- step=0.1)
36
  seed = gr.Slider(label='Seed',
37
  minimum=-1,
38
  maximum=2147483647,
39
  step=1,
40
- randomize=True)
 
 
41
  a_prompt = gr.Textbox(
42
  label='Added Prompt',
43
  value='best quality, extremely detailed')
@@ -47,31 +49,16 @@ def create_demo(process, max_images=12, default_num_images=3):
47
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
48
  )
49
  with gr.Column():
50
- result = gr.Gallery(label='Output',
51
- show_label=False,
52
- elem_id='gallery').style(grid=2,
53
- height='auto')
54
- inputs = [
55
- input_image,
56
- prompt,
57
- a_prompt,
58
- n_prompt,
59
- num_samples,
60
- image_resolution,
61
- num_steps,
62
- guidance_scale,
63
- seed,
64
  ]
65
- prompt.submit(fn=process, inputs=inputs, outputs=result)
66
  run_button.click(fn=process,
67
- inputs=inputs,
68
- outputs=result,
69
  api_name='scribble')
70
  return demo
71
-
72
-
73
- if __name__ == '__main__':
74
- from model import Model
75
- model = Model()
76
- demo = create_demo(model.process_scribble)
77
- demo.queue().launch()
 
3
  import gradio as gr
4
 
5
 
6
+ def create_demo(process, max_images=12):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Scribble Maps')
 
16
  num_samples = gr.Slider(label='Images',
17
  minimum=1,
18
  maximum=max_images,
19
+ value=1,
20
  step=1)
21
  image_resolution = gr.Slider(label='Image Resolution',
22
  minimum=256,
23
+ maximum=768,
24
  value=512,
25
  step=256)
26
+ ddim_steps = gr.Slider(label='Steps',
27
+ minimum=1,
28
+ maximum=100,
29
+ value=20,
30
+ step=1)
31
+ scale = gr.Slider(label='Guidance Scale',
32
+ minimum=0.1,
33
+ maximum=30.0,
34
+ value=9.0,
35
+ step=0.1)
36
  seed = gr.Slider(label='Seed',
37
  minimum=-1,
38
  maximum=2147483647,
39
  step=1,
40
+ randomize=True,
41
+ queue=False)
42
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
43
  a_prompt = gr.Textbox(
44
  label='Added Prompt',
45
  value='best quality, extremely detailed')
 
49
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
50
  )
51
  with gr.Column():
52
+ result_gallery = gr.Gallery(label='Output',
53
+ show_label=False,
54
+ elem_id='gallery').style(
55
+ grid=2, height='auto')
56
+ ips = [
57
+ input_image, prompt, a_prompt, n_prompt, num_samples,
58
+ image_resolution, ddim_steps, scale, seed, eta
 
 
 
 
 
 
 
59
  ]
 
60
  run_button.click(fn=process,
61
+ inputs=ips,
62
+ outputs=[result_gallery],
63
  api_name='scribble')
64
  return demo
 
 
 
 
 
 
 
app_scribble_interactive.py → gradio_scribble2image_interactive.py RENAMED
@@ -8,7 +8,7 @@ def create_canvas(w, h):
8
  return np.zeros(shape=(h, w, 3), dtype=np.uint8) + 255
9
 
10
 
11
- def create_demo(process, max_images=12, default_num_images=3):
12
  with gr.Blocks() as demo:
13
  with gr.Row():
14
  gr.Markdown(
@@ -17,12 +17,12 @@ def create_demo(process, max_images=12, default_num_images=3):
17
  with gr.Column():
18
  canvas_width = gr.Slider(label='Canvas Width',
19
  minimum=256,
20
- maximum=512,
21
  value=512,
22
  step=1)
23
  canvas_height = gr.Slider(label='Canvas Height',
24
  minimum=256,
25
- maximum=512,
26
  value=512,
27
  step=1)
28
  create_button = gr.Button(label='Start',
@@ -37,7 +37,7 @@ def create_demo(process, max_images=12, default_num_images=3):
37
  )
38
  create_button.click(fn=create_canvas,
39
  inputs=[canvas_width, canvas_height],
40
- outputs=input_image,
41
  queue=False)
42
  prompt = gr.Textbox(label='Prompt')
43
  run_button = gr.Button(label='Run')
@@ -45,28 +45,30 @@ def create_demo(process, max_images=12, default_num_images=3):
45
  num_samples = gr.Slider(label='Images',
46
  minimum=1,
47
  maximum=max_images,
48
- value=default_num_images,
49
  step=1)
50
  image_resolution = gr.Slider(label='Image Resolution',
51
  minimum=256,
52
- maximum=512,
53
  value=512,
54
  step=256)
55
- num_steps = gr.Slider(label='Steps',
56
- minimum=1,
57
- maximum=100,
58
- value=20,
59
- step=1)
60
- guidance_scale = gr.Slider(label='Guidance Scale',
61
- minimum=0.1,
62
- maximum=30.0,
63
- value=9.0,
64
- step=0.1)
65
  seed = gr.Slider(label='Seed',
66
  minimum=-1,
67
  maximum=2147483647,
68
  step=1,
69
- randomize=True)
 
 
70
  a_prompt = gr.Textbox(
71
  label='Added Prompt',
72
  value='best quality, extremely detailed')
@@ -76,28 +78,13 @@ def create_demo(process, max_images=12, default_num_images=3):
76
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
77
  )
78
  with gr.Column():
79
- result = gr.Gallery(label='Output',
80
- show_label=False,
81
- elem_id='gallery').style(grid=2,
82
- height='auto')
83
- inputs = [
84
- input_image,
85
- prompt,
86
- a_prompt,
87
- n_prompt,
88
- num_samples,
89
- image_resolution,
90
- num_steps,
91
- guidance_scale,
92
- seed,
93
  ]
94
- prompt.submit(fn=process, inputs=inputs, outputs=result)
95
- run_button.click(fn=process, inputs=inputs, outputs=result)
96
  return demo
97
-
98
-
99
- if __name__ == '__main__':
100
- from model import Model
101
- model = Model()
102
- demo = create_demo(model.process_scribble_interactive)
103
- demo.queue().launch()
 
8
  return np.zeros(shape=(h, w, 3), dtype=np.uint8) + 255
9
 
10
 
11
+ def create_demo(process, max_images=12):
12
  with gr.Blocks() as demo:
13
  with gr.Row():
14
  gr.Markdown(
 
17
  with gr.Column():
18
  canvas_width = gr.Slider(label='Canvas Width',
19
  minimum=256,
20
+ maximum=1024,
21
  value=512,
22
  step=1)
23
  canvas_height = gr.Slider(label='Canvas Height',
24
  minimum=256,
25
+ maximum=1024,
26
  value=512,
27
  step=1)
28
  create_button = gr.Button(label='Start',
 
37
  )
38
  create_button.click(fn=create_canvas,
39
  inputs=[canvas_width, canvas_height],
40
+ outputs=[input_image],
41
  queue=False)
42
  prompt = gr.Textbox(label='Prompt')
43
  run_button = gr.Button(label='Run')
 
45
  num_samples = gr.Slider(label='Images',
46
  minimum=1,
47
  maximum=max_images,
48
+ value=1,
49
  step=1)
50
  image_resolution = gr.Slider(label='Image Resolution',
51
  minimum=256,
52
+ maximum=768,
53
  value=512,
54
  step=256)
55
+ ddim_steps = gr.Slider(label='Steps',
56
+ minimum=1,
57
+ maximum=100,
58
+ value=20,
59
+ step=1)
60
+ scale = gr.Slider(label='Guidance Scale',
61
+ minimum=0.1,
62
+ maximum=30.0,
63
+ value=9.0,
64
+ step=0.1)
65
  seed = gr.Slider(label='Seed',
66
  minimum=-1,
67
  maximum=2147483647,
68
  step=1,
69
+ randomize=True,
70
+ queue=False)
71
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
72
  a_prompt = gr.Textbox(
73
  label='Added Prompt',
74
  value='best quality, extremely detailed')
 
78
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
79
  )
80
  with gr.Column():
81
+ result_gallery = gr.Gallery(label='Output',
82
+ show_label=False,
83
+ elem_id='gallery').style(
84
+ grid=2, height='auto')
85
+ ips = [
86
+ input_image, prompt, a_prompt, n_prompt, num_samples,
87
+ image_resolution, ddim_steps, scale, seed, eta
 
 
 
 
 
 
 
88
  ]
89
+ run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
 
90
  return demo
 
 
 
 
 
 
 
app_seg.py → gradio_seg2image.py RENAMED
@@ -3,7 +3,7 @@
3
  import gradio as gr
4
 
5
 
6
- def create_demo(process, max_images=12, default_num_images=3):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Segmentation Maps')
@@ -13,39 +13,39 @@ def create_demo(process, max_images=12, default_num_images=3):
13
  prompt = gr.Textbox(label='Prompt')
14
  run_button = gr.Button(label='Run')
15
  with gr.Accordion('Advanced options', open=False):
16
- is_segmentation_map = gr.Checkbox(
17
- label='Is segmentation map', value=False)
18
  num_samples = gr.Slider(label='Images',
19
  minimum=1,
20
  maximum=max_images,
21
- value=default_num_images,
22
  step=1)
23
  image_resolution = gr.Slider(label='Image Resolution',
24
  minimum=256,
25
- maximum=512,
26
  value=512,
27
  step=256)
28
  detect_resolution = gr.Slider(
29
  label='Segmentation Resolution',
30
  minimum=128,
31
- maximum=512,
32
  value=512,
33
  step=1)
34
- num_steps = gr.Slider(label='Steps',
35
- minimum=1,
36
- maximum=100,
37
- value=20,
38
- step=1)
39
- guidance_scale = gr.Slider(label='Guidance Scale',
40
- minimum=0.1,
41
- maximum=30.0,
42
- value=9.0,
43
- step=0.1)
44
  seed = gr.Slider(label='Seed',
45
  minimum=-1,
46
  maximum=2147483647,
47
  step=1,
48
- randomize=True)
 
 
49
  a_prompt = gr.Textbox(
50
  label='Added Prompt',
51
  value='best quality, extremely detailed')
@@ -55,33 +55,16 @@ def create_demo(process, max_images=12, default_num_images=3):
55
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
56
  )
57
  with gr.Column():
58
- result = gr.Gallery(label='Output',
59
- show_label=False,
60
- elem_id='gallery').style(grid=2,
61
- height='auto')
62
- inputs = [
63
- input_image,
64
- prompt,
65
- a_prompt,
66
- n_prompt,
67
- num_samples,
68
- image_resolution,
69
- detect_resolution,
70
- num_steps,
71
- guidance_scale,
72
- seed,
73
- is_segmentation_map,
74
  ]
75
- prompt.submit(fn=process, inputs=inputs, outputs=result)
76
  run_button.click(fn=process,
77
- inputs=inputs,
78
- outputs=result,
79
  api_name='seg')
80
  return demo
81
-
82
-
83
- if __name__ == '__main__':
84
- from model import Model
85
- model = Model()
86
- demo = create_demo(model.process_seg)
87
- demo.queue().launch()
 
3
  import gradio as gr
4
 
5
 
6
+ def create_demo(process, max_images=12):
7
  with gr.Blocks() as demo:
8
  with gr.Row():
9
  gr.Markdown('## Control Stable Diffusion with Segmentation Maps')
 
13
  prompt = gr.Textbox(label='Prompt')
14
  run_button = gr.Button(label='Run')
15
  with gr.Accordion('Advanced options', open=False):
 
 
16
  num_samples = gr.Slider(label='Images',
17
  minimum=1,
18
  maximum=max_images,
19
+ value=1,
20
  step=1)
21
  image_resolution = gr.Slider(label='Image Resolution',
22
  minimum=256,
23
+ maximum=768,
24
  value=512,
25
  step=256)
26
  detect_resolution = gr.Slider(
27
  label='Segmentation Resolution',
28
  minimum=128,
29
+ maximum=1024,
30
  value=512,
31
  step=1)
32
+ ddim_steps = gr.Slider(label='Steps',
33
+ minimum=1,
34
+ maximum=100,
35
+ value=20,
36
+ step=1)
37
+ scale = gr.Slider(label='Guidance Scale',
38
+ minimum=0.1,
39
+ maximum=30.0,
40
+ value=9.0,
41
+ step=0.1)
42
  seed = gr.Slider(label='Seed',
43
  minimum=-1,
44
  maximum=2147483647,
45
  step=1,
46
+ randomize=True,
47
+ queue=False)
48
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
49
  a_prompt = gr.Textbox(
50
  label='Added Prompt',
51
  value='best quality, extremely detailed')
 
55
  'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
56
  )
57
  with gr.Column():
58
+ result_gallery = gr.Gallery(label='Output',
59
+ show_label=False,
60
+ elem_id='gallery').style(
61
+ grid=2, height='auto')
62
+ ips = [
63
+ input_image, prompt, a_prompt, n_prompt, num_samples,
64
+ image_resolution, detect_resolution, ddim_steps, scale, seed, eta
 
 
 
 
 
 
 
 
 
65
  ]
 
66
  run_button.click(fn=process,
67
+ inputs=ips,
68
+ outputs=[result_gallery],
69
  api_name='seg')
70
  return demo
 
 
 
 
 
 
 
model.py CHANGED
@@ -2,648 +2,727 @@
2
  # The original license file is LICENSE.ControlNet in this repo.
3
  from __future__ import annotations
4
 
5
- import gc
6
  import pathlib
 
 
 
7
  import sys
8
 
9
  import cv2
 
10
  import numpy as np
11
- import PIL.Image
12
  import torch
13
- from diffusers import (ControlNetModel, DiffusionPipeline,
14
- StableDiffusionControlNetPipeline,
15
- UniPCMultistepScheduler)
16
-
17
- repo_dir = pathlib.Path(__file__).parent
18
- submodule_dir = repo_dir / 'ControlNet'
19
- sys.path.append(submodule_dir.as_posix())
20
-
21
- try:
22
- from annotator.canny import apply_canny
23
- from annotator.hed import apply_hed, nms
24
- from annotator.midas import apply_midas
25
- from annotator.mlsd import apply_mlsd
26
- from annotator.openpose import apply_openpose
27
- from annotator.uniformer import apply_uniformer
28
- from annotator.util import HWC3, resize_image
29
- except Exception:
30
- pass
31
-
32
- CONTROLNET_MODEL_IDS = {
33
- 'canny': 'lllyasviel/sd-controlnet-canny',
34
- 'hough': 'lllyasviel/sd-controlnet-mlsd',
35
- 'hed': 'lllyasviel/sd-controlnet-hed',
36
- 'scribble': 'lllyasviel/sd-controlnet-scribble',
37
- 'pose': 'lllyasviel/sd-controlnet-openpose',
38
- 'seg': 'lllyasviel/sd-controlnet-seg',
39
- 'depth': 'lllyasviel/sd-controlnet-depth',
40
- 'normal': 'lllyasviel/sd-controlnet-normal',
41
  }
42
-
43
-
44
- def download_all_controlnet_weights() -> None:
45
- for model_id in CONTROLNET_MODEL_IDS.values():
46
- ControlNetModel.from_pretrained(model_id)
47
 
48
 
49
  class Model:
50
  def __init__(self,
51
- base_model_id: str = 'runwayml/stable-diffusion-v1-5',
52
- task_name: str = 'canny'):
53
  self.device = torch.device(
54
  'cuda:0' if torch.cuda.is_available() else 'cpu')
55
- self.base_model_id = ''
 
56
  self.task_name = ''
57
- self.pipe = self.load_pipe(base_model_id, task_name)
58
-
59
- def load_pipe(self, base_model_id: str, task_name) -> DiffusionPipeline:
60
- if self.device.type == 'cpu':
61
- return None
62
- if base_model_id == self.base_model_id and task_name == self.task_name and hasattr(
63
- self, 'pipe'):
64
- return self.pipe
65
- model_id = CONTROLNET_MODEL_IDS[task_name]
66
- controlnet = ControlNetModel.from_pretrained(model_id,
67
- torch_dtype=torch.float16)
68
- pipe = StableDiffusionControlNetPipeline.from_pretrained(
69
- base_model_id,
70
- safety_checker=None,
71
- controlnet=controlnet,
72
- torch_dtype=torch.float16)
73
- pipe.scheduler = UniPCMultistepScheduler.from_config(
74
- pipe.scheduler.config)
75
- pipe.enable_xformers_memory_efficient_attention()
76
- pipe.to(self.device)
77
- torch.cuda.empty_cache()
78
- gc.collect()
79
- self.base_model_id = base_model_id
80
- self.task_name = task_name
81
- return pipe
82
-
83
- def set_base_model(self, base_model_id: str) -> str:
84
- if not base_model_id or base_model_id == self.base_model_id:
85
- return self.base_model_id
86
- del self.pipe
87
- torch.cuda.empty_cache()
88
- gc.collect()
89
- try:
90
- self.pipe = self.load_pipe(base_model_id, self.task_name)
91
- except Exception:
92
- self.pipe = self.load_pipe(self.base_model_id, self.task_name)
93
- return self.base_model_id
94
-
95
- def load_controlnet_weight(self, task_name: str) -> None:
96
  if task_name == self.task_name:
97
  return
98
- if 'controlnet' in self.pipe.__dict__:
99
- del self.pipe.controlnet
100
- torch.cuda.empty_cache()
101
- gc.collect()
102
- model_id = CONTROLNET_MODEL_IDS[task_name]
103
- controlnet = ControlNetModel.from_pretrained(model_id,
104
- torch_dtype=torch.float16)
105
- controlnet.to(self.device)
106
- torch.cuda.empty_cache()
107
- gc.collect()
108
- self.pipe.controlnet = controlnet
109
  self.task_name = task_name
110
 
111
- def get_prompt(self, prompt: str, additional_prompt: str) -> str:
112
- if not prompt:
113
- prompt = additional_prompt
114
- else:
115
- prompt = f'{prompt}, {additional_prompt}'
116
- return prompt
117
-
118
- @torch.autocast('cuda')
119
- def run_pipe(
120
- self,
121
- prompt: str,
122
- negative_prompt: str,
123
- control_image: PIL.Image.Image,
124
- num_images: int,
125
- num_steps: int,
126
- guidance_scale: float,
127
- seed: int,
128
- ) -> list[PIL.Image.Image]:
 
 
 
 
 
 
 
 
 
 
 
 
129
  if seed == -1:
130
- seed = np.random.randint(0, np.iinfo(np.int64).max)
131
- generator = torch.Generator().manual_seed(seed)
132
- return self.pipe(prompt=prompt,
133
- negative_prompt=negative_prompt,
134
- guidance_scale=guidance_scale,
135
- num_images_per_prompt=num_images,
136
- num_inference_steps=num_steps,
137
- generator=generator,
138
- image=control_image).images
139
-
140
- @staticmethod
141
- def preprocess_canny(
142
- input_image: np.ndarray,
143
- image_resolution: int,
144
- low_threshold: int,
145
- high_threshold: int,
146
- ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
147
- image = resize_image(HWC3(input_image), image_resolution)
148
- control_image = apply_canny(image, low_threshold, high_threshold)
149
- control_image = HWC3(control_image)
150
- vis_control_image = 255 - control_image
151
- return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
152
- vis_control_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  @torch.inference_mode()
155
- def process_canny(
156
- self,
157
- input_image: np.ndarray,
158
- prompt: str,
159
- additional_prompt: str,
160
- negative_prompt: str,
161
- num_images: int,
162
- image_resolution: int,
163
- num_steps: int,
164
- guidance_scale: float,
165
- seed: int,
166
- low_threshold: int,
167
- high_threshold: int,
168
- ) -> list[PIL.Image.Image]:
169
- control_image, vis_control_image = self.preprocess_canny(
170
- input_image=input_image,
171
- image_resolution=image_resolution,
172
- low_threshold=low_threshold,
173
- high_threshold=high_threshold,
174
- )
175
- self.load_controlnet_weight('canny')
176
- results = self.run_pipe(
177
- prompt=self.get_prompt(prompt, additional_prompt),
178
- negative_prompt=negative_prompt,
179
- control_image=control_image,
180
- num_images=num_images,
181
- num_steps=num_steps,
182
- guidance_scale=guidance_scale,
183
- seed=seed,
184
- )
185
- return [vis_control_image] + results
186
-
187
- @staticmethod
188
- def preprocess_hough(
189
- input_image: np.ndarray,
190
- image_resolution: int,
191
- detect_resolution: int,
192
- value_threshold: float,
193
- distance_threshold: float,
194
- ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
195
  input_image = HWC3(input_image)
196
- control_image = apply_mlsd(
197
- resize_image(input_image, detect_resolution), value_threshold,
198
- distance_threshold)
199
- control_image = HWC3(control_image)
200
- image = resize_image(input_image, image_resolution)
201
- H, W = image.shape[:2]
202
- control_image = cv2.resize(control_image, (W, H),
203
- interpolation=cv2.INTER_NEAREST)
204
 
205
- vis_control_image = 255 - cv2.dilate(
206
- control_image, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1)
207
 
208
- return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
209
- vis_control_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
  @torch.inference_mode()
212
- def process_hough(
213
- self,
214
- input_image: np.ndarray,
215
- prompt: str,
216
- additional_prompt: str,
217
- negative_prompt: str,
218
- num_images: int,
219
- image_resolution: int,
220
- detect_resolution: int,
221
- num_steps: int,
222
- guidance_scale: float,
223
- seed: int,
224
- value_threshold: float,
225
- distance_threshold: float,
226
- ) -> list[PIL.Image.Image]:
227
- control_image, vis_control_image = self.preprocess_hough(
228
- input_image=input_image,
229
- image_resolution=image_resolution,
230
- detect_resolution=detect_resolution,
231
- value_threshold=value_threshold,
232
- distance_threshold=distance_threshold,
233
- )
234
- self.load_controlnet_weight('hough')
235
- results = self.run_pipe(
236
- prompt=self.get_prompt(prompt, additional_prompt),
237
- negative_prompt=negative_prompt,
238
- control_image=control_image,
239
- num_images=num_images,
240
- num_steps=num_steps,
241
- guidance_scale=guidance_scale,
242
- seed=seed,
243
- )
244
- return [vis_control_image] + results
245
-
246
- @staticmethod
247
- def preprocess_hed(
248
- input_image: np.ndarray,
249
- image_resolution: int,
250
- detect_resolution: int,
251
- ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
252
  input_image = HWC3(input_image)
253
- control_image = apply_hed(resize_image(input_image, detect_resolution))
254
- control_image = HWC3(control_image)
255
- image = resize_image(input_image, image_resolution)
256
- H, W = image.shape[:2]
257
- control_image = cv2.resize(control_image, (W, H),
258
- interpolation=cv2.INTER_LINEAR)
259
- return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
260
- control_image)
261
 
262
- @torch.inference_mode()
263
- def process_hed(
264
- self,
265
- input_image: np.ndarray,
266
- prompt: str,
267
- additional_prompt: str,
268
- negative_prompt: str,
269
- num_images: int,
270
- image_resolution: int,
271
- detect_resolution: int,
272
- num_steps: int,
273
- guidance_scale: float,
274
- seed: int,
275
- ) -> list[PIL.Image.Image]:
276
- control_image, vis_control_image = self.preprocess_hed(
277
- input_image=input_image,
278
- image_resolution=image_resolution,
279
- detect_resolution=detect_resolution,
280
- )
281
- self.load_controlnet_weight('hed')
282
- results = self.run_pipe(
283
- prompt=self.get_prompt(prompt, additional_prompt),
284
- negative_prompt=negative_prompt,
285
- control_image=control_image,
286
- num_images=num_images,
287
- num_steps=num_steps,
288
- guidance_scale=guidance_scale,
289
- seed=seed,
290
- )
291
- return [vis_control_image] + results
292
-
293
- @staticmethod
294
- def preprocess_scribble(
295
- input_image: np.ndarray,
296
- image_resolution: int,
297
- ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
298
- image = resize_image(HWC3(input_image), image_resolution)
299
- control_image = np.zeros_like(image, dtype=np.uint8)
300
- control_image[np.min(image, axis=2) < 127] = 255
301
- vis_control_image = 255 - control_image
302
- return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
303
- vis_control_image)
 
 
 
 
 
 
 
 
 
304
 
305
  @torch.inference_mode()
306
- def process_scribble(
307
- self,
308
- input_image: np.ndarray,
309
- prompt: str,
310
- additional_prompt: str,
311
- negative_prompt: str,
312
- num_images: int,
313
- image_resolution: int,
314
- num_steps: int,
315
- guidance_scale: float,
316
- seed: int,
317
- ) -> list[PIL.Image.Image]:
318
- control_image, vis_control_image = self.preprocess_scribble(
319
- input_image=input_image,
320
- image_resolution=image_resolution,
321
- )
322
- self.load_controlnet_weight('scribble')
323
- results = self.run_pipe(
324
- prompt=self.get_prompt(prompt, additional_prompt),
325
- negative_prompt=negative_prompt,
326
- control_image=control_image,
327
- num_images=num_images,
328
- num_steps=num_steps,
329
- guidance_scale=guidance_scale,
330
- seed=seed,
331
- )
332
- return [vis_control_image] + results
333
-
334
- @staticmethod
335
- def preprocess_scribble_interactive(
336
- input_image: np.ndarray,
337
- image_resolution: int,
338
- ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
339
- image = resize_image(HWC3(input_image['mask'][:, :, 0]),
340
- image_resolution)
341
- control_image = np.zeros_like(image, dtype=np.uint8)
342
- control_image[np.min(image, axis=2) > 127] = 255
343
- vis_control_image = 255 - control_image
344
- return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
345
- vis_control_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
 
347
  @torch.inference_mode()
348
- def process_scribble_interactive(
349
- self,
350
- input_image: np.ndarray,
351
- prompt: str,
352
- additional_prompt: str,
353
- negative_prompt: str,
354
- num_images: int,
355
- image_resolution: int,
356
- num_steps: int,
357
- guidance_scale: float,
358
- seed: int,
359
- ) -> list[PIL.Image.Image]:
360
- control_image, vis_control_image = self.preprocess_scribble_interactive(
361
- input_image=input_image,
362
- image_resolution=image_resolution,
363
- )
364
- self.load_controlnet_weight('scribble')
365
- results = self.run_pipe(
366
- prompt=self.get_prompt(prompt, additional_prompt),
367
- negative_prompt=negative_prompt,
368
- control_image=control_image,
369
- num_images=num_images,
370
- num_steps=num_steps,
371
- guidance_scale=guidance_scale,
372
- seed=seed,
373
- )
374
- return [vis_control_image] + results
375
-
376
- @staticmethod
377
- def preprocess_fake_scribble(
378
- input_image: np.ndarray,
379
- image_resolution: int,
380
- detect_resolution: int,
381
- ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
382
- input_image = HWC3(input_image)
383
- control_image = apply_hed(resize_image(input_image, detect_resolution))
384
- control_image = HWC3(control_image)
385
- image = resize_image(input_image, image_resolution)
386
- H, W = image.shape[:2]
387
 
388
- control_image = cv2.resize(control_image, (W, H),
389
- interpolation=cv2.INTER_LINEAR)
390
- control_image = nms(control_image, 127, 3.0)
391
- control_image = cv2.GaussianBlur(control_image, (0, 0), 3.0)
392
- control_image[control_image > 4] = 255
393
- control_image[control_image < 255] = 0
394
 
395
- vis_control_image = 255 - control_image
 
396
 
397
- return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
398
- vis_control_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
  @torch.inference_mode()
401
- def process_fake_scribble(
402
- self,
403
- input_image: np.ndarray,
404
- prompt: str,
405
- additional_prompt: str,
406
- negative_prompt: str,
407
- num_images: int,
408
- image_resolution: int,
409
- detect_resolution: int,
410
- num_steps: int,
411
- guidance_scale: float,
412
- seed: int,
413
- ) -> list[PIL.Image.Image]:
414
- control_image, vis_control_image = self.preprocess_fake_scribble(
415
- input_image=input_image,
416
- image_resolution=image_resolution,
417
- detect_resolution=detect_resolution,
418
- )
419
- self.load_controlnet_weight('scribble')
420
- results = self.run_pipe(
421
- prompt=self.get_prompt(prompt, additional_prompt),
422
- negative_prompt=negative_prompt,
423
- control_image=control_image,
424
- num_images=num_images,
425
- num_steps=num_steps,
426
- guidance_scale=guidance_scale,
427
- seed=seed,
428
- )
429
- return [vis_control_image] + results
430
-
431
- @staticmethod
432
- def preprocess_pose(
433
- input_image: np.ndarray,
434
- image_resolution: int,
435
- detect_resolution: int,
436
- is_pose_image: bool,
437
- ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
438
  input_image = HWC3(input_image)
439
- if not is_pose_image:
440
- control_image, _ = apply_openpose(
441
- resize_image(input_image, detect_resolution))
442
- control_image = HWC3(control_image)
443
- image = resize_image(input_image, image_resolution)
444
- H, W = image.shape[:2]
445
- control_image = cv2.resize(control_image, (W, H),
446
- interpolation=cv2.INTER_NEAREST)
447
- else:
448
- control_image = resize_image(input_image, image_resolution)
449
-
450
- return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
451
- control_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
 
453
  @torch.inference_mode()
454
- def process_pose(
455
- self,
456
- input_image: np.ndarray,
457
- prompt: str,
458
- additional_prompt: str,
459
- negative_prompt: str,
460
- num_images: int,
461
- image_resolution: int,
462
- detect_resolution: int,
463
- num_steps: int,
464
- guidance_scale: float,
465
- seed: int,
466
- is_pose_image: bool,
467
- ) -> list[PIL.Image.Image]:
468
- control_image, vis_control_image = self.preprocess_pose(
469
- input_image=input_image,
470
- image_resolution=image_resolution,
471
- detect_resolution=detect_resolution,
472
- is_pose_image=is_pose_image,
473
- )
474
- self.load_controlnet_weight('pose')
475
- results = self.run_pipe(
476
- prompt=self.get_prompt(prompt, additional_prompt),
477
- negative_prompt=negative_prompt,
478
- control_image=control_image,
479
- num_images=num_images,
480
- num_steps=num_steps,
481
- guidance_scale=guidance_scale,
482
- seed=seed,
483
- )
484
- return [vis_control_image] + results
485
-
486
- @staticmethod
487
- def preprocess_seg(
488
- input_image: np.ndarray,
489
- image_resolution: int,
490
- detect_resolution: int,
491
- is_segmentation_map: bool,
492
- ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
493
  input_image = HWC3(input_image)
494
- if not is_segmentation_map:
495
- control_image = apply_uniformer(
496
- resize_image(input_image, detect_resolution))
497
- image = resize_image(input_image, image_resolution)
498
- H, W = image.shape[:2]
499
- control_image = cv2.resize(control_image, (W, H),
500
- interpolation=cv2.INTER_NEAREST)
501
- else:
502
- control_image = resize_image(input_image, image_resolution)
503
- return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
504
- control_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
 
506
  @torch.inference_mode()
507
- def process_seg(
508
- self,
509
- input_image: np.ndarray,
510
- prompt: str,
511
- additional_prompt: str,
512
- negative_prompt: str,
513
- num_images: int,
514
- image_resolution: int,
515
- detect_resolution: int,
516
- num_steps: int,
517
- guidance_scale: float,
518
- seed: int,
519
- is_segmentation_map: bool,
520
- ) -> list[PIL.Image.Image]:
521
- control_image, vis_control_image = self.preprocess_seg(
522
- input_image=input_image,
523
- image_resolution=image_resolution,
524
- detect_resolution=detect_resolution,
525
- is_segmentation_map=is_segmentation_map,
526
- )
527
- self.load_controlnet_weight('seg')
528
- results = self.run_pipe(
529
- prompt=self.get_prompt(prompt, additional_prompt),
530
- negative_prompt=negative_prompt,
531
- control_image=control_image,
532
- num_images=num_images,
533
- num_steps=num_steps,
534
- guidance_scale=guidance_scale,
535
- seed=seed,
536
- )
537
- return [vis_control_image] + results
538
-
539
- @staticmethod
540
- def preprocess_depth(
541
- input_image: np.ndarray,
542
- image_resolution: int,
543
- detect_resolution: int,
544
- is_depth_image: bool,
545
- ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
546
  input_image = HWC3(input_image)
547
- if not is_depth_image:
548
- control_image, _ = apply_midas(
549
- resize_image(input_image, detect_resolution))
550
- control_image = HWC3(control_image)
551
- image = resize_image(input_image, image_resolution)
552
- H, W = image.shape[:2]
553
- control_image = cv2.resize(control_image, (W, H),
554
- interpolation=cv2.INTER_LINEAR)
555
- else:
556
- control_image = resize_image(input_image, image_resolution)
557
- return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
558
- control_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
559
 
560
  @torch.inference_mode()
561
- def process_depth(
562
- self,
563
- input_image: np.ndarray,
564
- prompt: str,
565
- additional_prompt: str,
566
- negative_prompt: str,
567
- num_images: int,
568
- image_resolution: int,
569
- detect_resolution: int,
570
- num_steps: int,
571
- guidance_scale: float,
572
- seed: int,
573
- is_depth_image: bool,
574
- ) -> list[PIL.Image.Image]:
575
- control_image, vis_control_image = self.preprocess_depth(
576
- input_image=input_image,
577
- image_resolution=image_resolution,
578
- detect_resolution=detect_resolution,
579
- is_depth_image=is_depth_image,
580
- )
581
- self.load_controlnet_weight('depth')
582
- results = self.run_pipe(
583
- prompt=self.get_prompt(prompt, additional_prompt),
584
- negative_prompt=negative_prompt,
585
- control_image=control_image,
586
- num_images=num_images,
587
- num_steps=num_steps,
588
- guidance_scale=guidance_scale,
589
- seed=seed,
590
- )
591
- return [vis_control_image] + results
592
-
593
- @staticmethod
594
- def preprocess_normal(
595
- input_image: np.ndarray,
596
- image_resolution: int,
597
- detect_resolution: int,
598
- bg_threshold: float,
599
- is_normal_image: bool,
600
- ) -> tuple[PIL.Image.Image, PIL.Image.Image]:
601
  input_image = HWC3(input_image)
602
- if not is_normal_image:
603
- _, control_image = apply_midas(resize_image(
604
- input_image, detect_resolution),
605
- bg_th=bg_threshold)
606
- control_image = HWC3(control_image)
607
- image = resize_image(input_image, image_resolution)
608
- H, W = image.shape[:2]
609
- control_image = cv2.resize(control_image, (W, H),
610
- interpolation=cv2.INTER_LINEAR)
611
- else:
612
- control_image = resize_image(input_image, image_resolution)
613
- return PIL.Image.fromarray(control_image), PIL.Image.fromarray(
614
- control_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
615
 
616
  @torch.inference_mode()
617
- def process_normal(
618
- self,
619
- input_image: np.ndarray,
620
- prompt: str,
621
- additional_prompt: str,
622
- negative_prompt: str,
623
- num_images: int,
624
- image_resolution: int,
625
- detect_resolution: int,
626
- num_steps: int,
627
- guidance_scale: float,
628
- seed: int,
629
- bg_threshold: float,
630
- is_normal_image: bool,
631
- ) -> list[PIL.Image.Image]:
632
- control_image, vis_control_image = self.preprocess_normal(
633
- input_image=input_image,
634
- image_resolution=image_resolution,
635
- detect_resolution=detect_resolution,
636
- bg_threshold=bg_threshold,
637
- is_normal_image=is_normal_image,
638
- )
639
- self.load_controlnet_weight('normal')
640
- results = self.run_pipe(
641
- prompt=self.get_prompt(prompt, additional_prompt),
642
- negative_prompt=negative_prompt,
643
- control_image=control_image,
644
- num_images=num_images,
645
- num_steps=num_steps,
646
- guidance_scale=guidance_scale,
647
- seed=seed,
648
- )
649
- return [vis_control_image] + results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  # The original license file is LICENSE.ControlNet in this repo.
3
  from __future__ import annotations
4
 
 
5
  import pathlib
6
+ import random
7
+ import shlex
8
+ import subprocess
9
  import sys
10
 
11
  import cv2
12
+ import einops
13
  import numpy as np
 
14
  import torch
15
+ from pytorch_lightning import seed_everything
16
+
17
+ sys.path.append('ControlNet')
18
+
19
+ import config
20
+ from annotator.canny import apply_canny
21
+ from annotator.hed import apply_hed, nms
22
+ from annotator.midas import apply_midas
23
+ from annotator.mlsd import apply_mlsd
24
+ from annotator.openpose import apply_openpose
25
+ from annotator.uniformer import apply_uniformer
26
+ from annotator.util import HWC3, resize_image
27
+ from cldm.model import create_model, load_state_dict
28
+ from ldm.models.diffusion.ddim import DDIMSampler
29
+ from share import *
30
+
31
+ ORIGINAL_MODEL_NAMES = {
32
+ 'canny': 'control_sd15_canny.pth',
33
+ 'hough': 'control_sd15_mlsd.pth',
34
+ 'hed': 'control_sd15_hed.pth',
35
+ 'scribble': 'control_sd15_scribble.pth',
36
+ 'pose': 'control_sd15_openpose.pth',
37
+ 'seg': 'control_sd15_seg.pth',
38
+ 'depth': 'control_sd15_depth.pth',
39
+ 'normal': 'control_sd15_normal.pth',
 
 
 
40
  }
41
+ ORIGINAL_WEIGHT_ROOT = 'https://huggingface.co/lllyasviel/ControlNet/resolve/main/models/'
 
 
 
 
42
 
43
 
44
  class Model:
45
  def __init__(self,
46
+ model_config_path: str = 'ControlNet/models/cldm_v15.yaml',
47
+ model_dir: str = 'models'):
48
  self.device = torch.device(
49
  'cuda:0' if torch.cuda.is_available() else 'cpu')
50
+ self.model = create_model(model_config_path).to(self.device)
51
+ self.ddim_sampler = DDIMSampler(self.model)
52
  self.task_name = ''
53
+
54
+ self.model_dir = pathlib.Path(model_dir)
55
+ self.model_dir.mkdir(exist_ok=True, parents=True)
56
+
57
+ self.model_names = ORIGINAL_MODEL_NAMES
58
+ self.weight_root = ORIGINAL_WEIGHT_ROOT
59
+ self.download_models()
60
+
61
+ def load_weight(self, task_name: str) -> None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  if task_name == self.task_name:
63
  return
64
+ weight_path = self.get_weight_path(task_name)
65
+ self.model.load_state_dict(
66
+ load_state_dict(weight_path, location=self.device))
 
 
 
 
 
 
 
 
67
  self.task_name = task_name
68
 
69
+ def get_weight_path(self, task_name: str) -> str:
70
+ if 'scribble' in task_name:
71
+ task_name = 'scribble'
72
+ return f'{self.model_dir}/{self.model_names[task_name]}'
73
+
74
+ def download_models(self) -> None:
75
+ self.model_dir.mkdir(exist_ok=True, parents=True)
76
+ for name in self.model_names.values():
77
+ out_path = self.model_dir / name
78
+ if out_path.exists():
79
+ continue
80
+ subprocess.run(
81
+ shlex.split(f'wget {self.weight_root}{name} -O {out_path}'))
82
+
83
+ @torch.inference_mode()
84
+ def process_canny(self, input_image, prompt, a_prompt, n_prompt,
85
+ num_samples, image_resolution, ddim_steps, scale, seed,
86
+ eta, low_threshold, high_threshold):
87
+ self.load_weight('canny')
88
+
89
+ img = resize_image(HWC3(input_image), image_resolution)
90
+ H, W, C = img.shape
91
+
92
+ detected_map = apply_canny(img, low_threshold, high_threshold)
93
+ detected_map = HWC3(detected_map)
94
+
95
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
96
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
97
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
98
+
99
  if seed == -1:
100
+ seed = random.randint(0, 65535)
101
+ seed_everything(seed)
102
+
103
+ if config.save_memory:
104
+ self.model.low_vram_shift(is_diffusing=False)
105
+
106
+ cond = {
107
+ 'c_concat': [control],
108
+ 'c_crossattn': [
109
+ self.model.get_learned_conditioning(
110
+ [prompt + ', ' + a_prompt] * num_samples)
111
+ ]
112
+ }
113
+ un_cond = {
114
+ 'c_concat': [control],
115
+ 'c_crossattn':
116
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
117
+ }
118
+ shape = (4, H // 8, W // 8)
119
+
120
+ if config.save_memory:
121
+ self.model.low_vram_shift(is_diffusing=True)
122
+
123
+ samples, intermediates = self.ddim_sampler.sample(
124
+ ddim_steps,
125
+ num_samples,
126
+ shape,
127
+ cond,
128
+ verbose=False,
129
+ eta=eta,
130
+ unconditional_guidance_scale=scale,
131
+ unconditional_conditioning=un_cond)
132
+
133
+ if config.save_memory:
134
+ self.model.low_vram_shift(is_diffusing=False)
135
+
136
+ x_samples = self.model.decode_first_stage(samples)
137
+ x_samples = (
138
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
139
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
140
+
141
+ results = [x_samples[i] for i in range(num_samples)]
142
+ return [255 - detected_map] + results
143
 
144
  @torch.inference_mode()
145
+ def process_hough(self, input_image, prompt, a_prompt, n_prompt,
146
+ num_samples, image_resolution, detect_resolution,
147
+ ddim_steps, scale, seed, eta, value_threshold,
148
+ distance_threshold):
149
+ self.load_weight('hough')
150
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  input_image = HWC3(input_image)
152
+ detected_map = apply_mlsd(resize_image(input_image, detect_resolution),
153
+ value_threshold, distance_threshold)
154
+ detected_map = HWC3(detected_map)
155
+ img = resize_image(input_image, image_resolution)
156
+ H, W, C = img.shape
 
 
 
157
 
158
+ detected_map = cv2.resize(detected_map, (W, H),
159
+ interpolation=cv2.INTER_NEAREST)
160
 
161
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
162
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
163
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
164
+
165
+ if seed == -1:
166
+ seed = random.randint(0, 65535)
167
+ seed_everything(seed)
168
+
169
+ if config.save_memory:
170
+ self.model.low_vram_shift(is_diffusing=False)
171
+
172
+ cond = {
173
+ 'c_concat': [control],
174
+ 'c_crossattn': [
175
+ self.model.get_learned_conditioning(
176
+ [prompt + ', ' + a_prompt] * num_samples)
177
+ ]
178
+ }
179
+ un_cond = {
180
+ 'c_concat': [control],
181
+ 'c_crossattn':
182
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
183
+ }
184
+ shape = (4, H // 8, W // 8)
185
+
186
+ if config.save_memory:
187
+ self.model.low_vram_shift(is_diffusing=True)
188
+
189
+ samples, intermediates = self.ddim_sampler.sample(
190
+ ddim_steps,
191
+ num_samples,
192
+ shape,
193
+ cond,
194
+ verbose=False,
195
+ eta=eta,
196
+ unconditional_guidance_scale=scale,
197
+ unconditional_conditioning=un_cond)
198
+
199
+ if config.save_memory:
200
+ self.model.low_vram_shift(is_diffusing=False)
201
+
202
+ x_samples = self.model.decode_first_stage(samples)
203
+ x_samples = (
204
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
205
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
206
+
207
+ results = [x_samples[i] for i in range(num_samples)]
208
+ return [
209
+ 255 - cv2.dilate(detected_map,
210
+ np.ones(shape=(3, 3), dtype=np.uint8),
211
+ iterations=1)
212
+ ] + results
213
 
214
  @torch.inference_mode()
215
+ def process_hed(self, input_image, prompt, a_prompt, n_prompt, num_samples,
216
+ image_resolution, detect_resolution, ddim_steps, scale,
217
+ seed, eta):
218
+ self.load_weight('hed')
219
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  input_image = HWC3(input_image)
221
+ detected_map = apply_hed(resize_image(input_image, detect_resolution))
222
+ detected_map = HWC3(detected_map)
223
+ img = resize_image(input_image, image_resolution)
224
+ H, W, C = img.shape
 
 
 
 
225
 
226
+ detected_map = cv2.resize(detected_map, (W, H),
227
+ interpolation=cv2.INTER_LINEAR)
228
+
229
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
230
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
231
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
232
+
233
+ if seed == -1:
234
+ seed = random.randint(0, 65535)
235
+ seed_everything(seed)
236
+
237
+ if config.save_memory:
238
+ self.model.low_vram_shift(is_diffusing=False)
239
+
240
+ cond = {
241
+ 'c_concat': [control],
242
+ 'c_crossattn': [
243
+ self.model.get_learned_conditioning(
244
+ [prompt + ', ' + a_prompt] * num_samples)
245
+ ]
246
+ }
247
+ un_cond = {
248
+ 'c_concat': [control],
249
+ 'c_crossattn':
250
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
251
+ }
252
+ shape = (4, H // 8, W // 8)
253
+
254
+ if config.save_memory:
255
+ self.model.low_vram_shift(is_diffusing=True)
256
+
257
+ samples, intermediates = self.ddim_sampler.sample(
258
+ ddim_steps,
259
+ num_samples,
260
+ shape,
261
+ cond,
262
+ verbose=False,
263
+ eta=eta,
264
+ unconditional_guidance_scale=scale,
265
+ unconditional_conditioning=un_cond)
266
+
267
+ if config.save_memory:
268
+ self.model.low_vram_shift(is_diffusing=False)
269
+
270
+ x_samples = self.model.decode_first_stage(samples)
271
+ x_samples = (
272
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
273
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
274
+
275
+ results = [x_samples[i] for i in range(num_samples)]
276
+ return [detected_map] + results
277
 
278
  @torch.inference_mode()
279
+ def process_scribble(self, input_image, prompt, a_prompt, n_prompt,
280
+ num_samples, image_resolution, ddim_steps, scale,
281
+ seed, eta):
282
+ self.load_weight('scribble')
283
+
284
+ img = resize_image(HWC3(input_image), image_resolution)
285
+ H, W, C = img.shape
286
+
287
+ detected_map = np.zeros_like(img, dtype=np.uint8)
288
+ detected_map[np.min(img, axis=2) < 127] = 255
289
+
290
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
291
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
292
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
293
+
294
+ if seed == -1:
295
+ seed = random.randint(0, 65535)
296
+ seed_everything(seed)
297
+
298
+ if config.save_memory:
299
+ self.model.low_vram_shift(is_diffusing=False)
300
+
301
+ cond = {
302
+ 'c_concat': [control],
303
+ 'c_crossattn': [
304
+ self.model.get_learned_conditioning(
305
+ [prompt + ', ' + a_prompt] * num_samples)
306
+ ]
307
+ }
308
+ un_cond = {
309
+ 'c_concat': [control],
310
+ 'c_crossattn':
311
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
312
+ }
313
+ shape = (4, H // 8, W // 8)
314
+
315
+ if config.save_memory:
316
+ self.model.low_vram_shift(is_diffusing=True)
317
+
318
+ samples, intermediates = self.ddim_sampler.sample(
319
+ ddim_steps,
320
+ num_samples,
321
+ shape,
322
+ cond,
323
+ verbose=False,
324
+ eta=eta,
325
+ unconditional_guidance_scale=scale,
326
+ unconditional_conditioning=un_cond)
327
+
328
+ if config.save_memory:
329
+ self.model.low_vram_shift(is_diffusing=False)
330
+
331
+ x_samples = self.model.decode_first_stage(samples)
332
+ x_samples = (
333
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
334
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
335
+
336
+ results = [x_samples[i] for i in range(num_samples)]
337
+ return [255 - detected_map] + results
338
 
339
  @torch.inference_mode()
340
+ def process_scribble_interactive(self, input_image, prompt, a_prompt,
341
+ n_prompt, num_samples, image_resolution,
342
+ ddim_steps, scale, seed, eta):
343
+ self.load_weight('scribble')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
 
345
+ img = resize_image(HWC3(input_image['mask'][:, :, 0]),
346
+ image_resolution)
347
+ H, W, C = img.shape
 
 
 
348
 
349
+ detected_map = np.zeros_like(img, dtype=np.uint8)
350
+ detected_map[np.min(img, axis=2) > 127] = 255
351
 
352
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
353
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
354
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
355
+
356
+ if seed == -1:
357
+ seed = random.randint(0, 65535)
358
+ seed_everything(seed)
359
+
360
+ if config.save_memory:
361
+ self.model.low_vram_shift(is_diffusing=False)
362
+
363
+ cond = {
364
+ 'c_concat': [control],
365
+ 'c_crossattn': [
366
+ self.model.get_learned_conditioning(
367
+ [prompt + ', ' + a_prompt] * num_samples)
368
+ ]
369
+ }
370
+ un_cond = {
371
+ 'c_concat': [control],
372
+ 'c_crossattn':
373
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
374
+ }
375
+ shape = (4, H // 8, W // 8)
376
+
377
+ if config.save_memory:
378
+ self.model.low_vram_shift(is_diffusing=True)
379
+
380
+ samples, intermediates = self.ddim_sampler.sample(
381
+ ddim_steps,
382
+ num_samples,
383
+ shape,
384
+ cond,
385
+ verbose=False,
386
+ eta=eta,
387
+ unconditional_guidance_scale=scale,
388
+ unconditional_conditioning=un_cond)
389
+
390
+ if config.save_memory:
391
+ self.model.low_vram_shift(is_diffusing=False)
392
+
393
+ x_samples = self.model.decode_first_stage(samples)
394
+ x_samples = (
395
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
396
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
397
+
398
+ results = [x_samples[i] for i in range(num_samples)]
399
+ return [255 - detected_map] + results
400
 
401
  @torch.inference_mode()
402
+ def process_fake_scribble(self, input_image, prompt, a_prompt, n_prompt,
403
+ num_samples, image_resolution, detect_resolution,
404
+ ddim_steps, scale, seed, eta):
405
+ self.load_weight('scribble')
406
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  input_image = HWC3(input_image)
408
+ detected_map = apply_hed(resize_image(input_image, detect_resolution))
409
+ detected_map = HWC3(detected_map)
410
+ img = resize_image(input_image, image_resolution)
411
+ H, W, C = img.shape
412
+
413
+ detected_map = cv2.resize(detected_map, (W, H),
414
+ interpolation=cv2.INTER_LINEAR)
415
+ detected_map = nms(detected_map, 127, 3.0)
416
+ detected_map = cv2.GaussianBlur(detected_map, (0, 0), 3.0)
417
+ detected_map[detected_map > 4] = 255
418
+ detected_map[detected_map < 255] = 0
419
+
420
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
421
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
422
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
423
+
424
+ if seed == -1:
425
+ seed = random.randint(0, 65535)
426
+ seed_everything(seed)
427
+
428
+ if config.save_memory:
429
+ self.model.low_vram_shift(is_diffusing=False)
430
+
431
+ cond = {
432
+ 'c_concat': [control],
433
+ 'c_crossattn': [
434
+ self.model.get_learned_conditioning(
435
+ [prompt + ', ' + a_prompt] * num_samples)
436
+ ]
437
+ }
438
+ un_cond = {
439
+ 'c_concat': [control],
440
+ 'c_crossattn':
441
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
442
+ }
443
+ shape = (4, H // 8, W // 8)
444
+
445
+ if config.save_memory:
446
+ self.model.low_vram_shift(is_diffusing=True)
447
+
448
+ samples, intermediates = self.ddim_sampler.sample(
449
+ ddim_steps,
450
+ num_samples,
451
+ shape,
452
+ cond,
453
+ verbose=False,
454
+ eta=eta,
455
+ unconditional_guidance_scale=scale,
456
+ unconditional_conditioning=un_cond)
457
+
458
+ if config.save_memory:
459
+ self.model.low_vram_shift(is_diffusing=False)
460
+
461
+ x_samples = self.model.decode_first_stage(samples)
462
+ x_samples = (
463
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
464
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
465
+
466
+ results = [x_samples[i] for i in range(num_samples)]
467
+ return [255 - detected_map] + results
468
 
469
  @torch.inference_mode()
470
+ def process_pose(self, input_image, prompt, a_prompt, n_prompt,
471
+ num_samples, image_resolution, detect_resolution,
472
+ ddim_steps, scale, seed, eta):
473
+ self.load_weight('pose')
474
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
  input_image = HWC3(input_image)
476
+ detected_map, _ = apply_openpose(
477
+ resize_image(input_image, detect_resolution))
478
+ detected_map = HWC3(detected_map)
479
+ img = resize_image(input_image, image_resolution)
480
+ H, W, C = img.shape
481
+
482
+ detected_map = cv2.resize(detected_map, (W, H),
483
+ interpolation=cv2.INTER_NEAREST)
484
+
485
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
486
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
487
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
488
+
489
+ if seed == -1:
490
+ seed = random.randint(0, 65535)
491
+ seed_everything(seed)
492
+
493
+ if config.save_memory:
494
+ self.model.low_vram_shift(is_diffusing=False)
495
+
496
+ cond = {
497
+ 'c_concat': [control],
498
+ 'c_crossattn': [
499
+ self.model.get_learned_conditioning(
500
+ [prompt + ', ' + a_prompt] * num_samples)
501
+ ]
502
+ }
503
+ un_cond = {
504
+ 'c_concat': [control],
505
+ 'c_crossattn':
506
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
507
+ }
508
+ shape = (4, H // 8, W // 8)
509
+
510
+ if config.save_memory:
511
+ self.model.low_vram_shift(is_diffusing=True)
512
+
513
+ samples, intermediates = self.ddim_sampler.sample(
514
+ ddim_steps,
515
+ num_samples,
516
+ shape,
517
+ cond,
518
+ verbose=False,
519
+ eta=eta,
520
+ unconditional_guidance_scale=scale,
521
+ unconditional_conditioning=un_cond)
522
+
523
+ if config.save_memory:
524
+ self.model.low_vram_shift(is_diffusing=False)
525
+
526
+ x_samples = self.model.decode_first_stage(samples)
527
+ x_samples = (
528
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
529
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
530
+
531
+ results = [x_samples[i] for i in range(num_samples)]
532
+ return [detected_map] + results
533
 
534
  @torch.inference_mode()
535
+ def process_seg(self, input_image, prompt, a_prompt, n_prompt, num_samples,
536
+ image_resolution, detect_resolution, ddim_steps, scale,
537
+ seed, eta):
538
+ self.load_weight('seg')
539
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
  input_image = HWC3(input_image)
541
+ detected_map = apply_uniformer(
542
+ resize_image(input_image, detect_resolution))
543
+ img = resize_image(input_image, image_resolution)
544
+ H, W, C = img.shape
545
+
546
+ detected_map = cv2.resize(detected_map, (W, H),
547
+ interpolation=cv2.INTER_NEAREST)
548
+
549
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
550
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
551
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
552
+
553
+ if seed == -1:
554
+ seed = random.randint(0, 65535)
555
+ seed_everything(seed)
556
+
557
+ if config.save_memory:
558
+ self.model.low_vram_shift(is_diffusing=False)
559
+
560
+ cond = {
561
+ 'c_concat': [control],
562
+ 'c_crossattn': [
563
+ self.model.get_learned_conditioning(
564
+ [prompt + ', ' + a_prompt] * num_samples)
565
+ ]
566
+ }
567
+ un_cond = {
568
+ 'c_concat': [control],
569
+ 'c_crossattn':
570
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
571
+ }
572
+ shape = (4, H // 8, W // 8)
573
+
574
+ if config.save_memory:
575
+ self.model.low_vram_shift(is_diffusing=True)
576
+
577
+ samples, intermediates = self.ddim_sampler.sample(
578
+ ddim_steps,
579
+ num_samples,
580
+ shape,
581
+ cond,
582
+ verbose=False,
583
+ eta=eta,
584
+ unconditional_guidance_scale=scale,
585
+ unconditional_conditioning=un_cond)
586
+
587
+ if config.save_memory:
588
+ self.model.low_vram_shift(is_diffusing=False)
589
+
590
+ x_samples = self.model.decode_first_stage(samples)
591
+ x_samples = (
592
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
593
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
594
+
595
+ results = [x_samples[i] for i in range(num_samples)]
596
+ return [detected_map] + results
597
 
598
  @torch.inference_mode()
599
+ def process_depth(self, input_image, prompt, a_prompt, n_prompt,
600
+ num_samples, image_resolution, detect_resolution,
601
+ ddim_steps, scale, seed, eta):
602
+ self.load_weight('depth')
603
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
604
  input_image = HWC3(input_image)
605
+ detected_map, _ = apply_midas(
606
+ resize_image(input_image, detect_resolution))
607
+ detected_map = HWC3(detected_map)
608
+ img = resize_image(input_image, image_resolution)
609
+ H, W, C = img.shape
610
+
611
+ detected_map = cv2.resize(detected_map, (W, H),
612
+ interpolation=cv2.INTER_LINEAR)
613
+
614
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
615
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
616
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
617
+
618
+ if seed == -1:
619
+ seed = random.randint(0, 65535)
620
+ seed_everything(seed)
621
+
622
+ if config.save_memory:
623
+ self.model.low_vram_shift(is_diffusing=False)
624
+
625
+ cond = {
626
+ 'c_concat': [control],
627
+ 'c_crossattn': [
628
+ self.model.get_learned_conditioning(
629
+ [prompt + ', ' + a_prompt] * num_samples)
630
+ ]
631
+ }
632
+ un_cond = {
633
+ 'c_concat': [control],
634
+ 'c_crossattn':
635
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
636
+ }
637
+ shape = (4, H // 8, W // 8)
638
+
639
+ if config.save_memory:
640
+ self.model.low_vram_shift(is_diffusing=True)
641
+
642
+ samples, intermediates = self.ddim_sampler.sample(
643
+ ddim_steps,
644
+ num_samples,
645
+ shape,
646
+ cond,
647
+ verbose=False,
648
+ eta=eta,
649
+ unconditional_guidance_scale=scale,
650
+ unconditional_conditioning=un_cond)
651
+
652
+ if config.save_memory:
653
+ self.model.low_vram_shift(is_diffusing=False)
654
+
655
+ x_samples = self.model.decode_first_stage(samples)
656
+ x_samples = (
657
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
658
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
659
+
660
+ results = [x_samples[i] for i in range(num_samples)]
661
+ return [detected_map] + results
662
 
663
  @torch.inference_mode()
664
+ def process_normal(self, input_image, prompt, a_prompt, n_prompt,
665
+ num_samples, image_resolution, detect_resolution,
666
+ ddim_steps, scale, seed, eta, bg_threshold):
667
+ self.load_weight('normal')
668
+
669
+ input_image = HWC3(input_image)
670
+ _, detected_map = apply_midas(resize_image(input_image,
671
+ detect_resolution),
672
+ bg_th=bg_threshold)
673
+ detected_map = HWC3(detected_map)
674
+ img = resize_image(input_image, image_resolution)
675
+ H, W, C = img.shape
676
+
677
+ detected_map = cv2.resize(detected_map, (W, H),
678
+ interpolation=cv2.INTER_LINEAR)
679
+
680
+ control = torch.from_numpy(
681
+ detected_map[:, :, ::-1].copy()).float().cuda() / 255.0
682
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
683
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
684
+
685
+ if seed == -1:
686
+ seed = random.randint(0, 65535)
687
+ seed_everything(seed)
688
+
689
+ if config.save_memory:
690
+ self.model.low_vram_shift(is_diffusing=False)
691
+
692
+ cond = {
693
+ 'c_concat': [control],
694
+ 'c_crossattn': [
695
+ self.model.get_learned_conditioning(
696
+ [prompt + ', ' + a_prompt] * num_samples)
697
+ ]
698
+ }
699
+ un_cond = {
700
+ 'c_concat': [control],
701
+ 'c_crossattn':
702
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
703
+ }
704
+ shape = (4, H // 8, W // 8)
705
+
706
+ if config.save_memory:
707
+ self.model.low_vram_shift(is_diffusing=True)
708
+
709
+ samples, intermediates = self.ddim_sampler.sample(
710
+ ddim_steps,
711
+ num_samples,
712
+ shape,
713
+ cond,
714
+ verbose=False,
715
+ eta=eta,
716
+ unconditional_guidance_scale=scale,
717
+ unconditional_conditioning=un_cond)
718
+
719
+ if config.save_memory:
720
+ self.model.low_vram_shift(is_diffusing=False)
721
+
722
+ x_samples = self.model.decode_first_stage(samples)
723
+ x_samples = (
724
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
725
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
726
+
727
+ results = [x_samples[i] for i in range(num_samples)]
728
+ return [detected_map] + results
notebooks/notebook.ipynb DELETED
@@ -1,80 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "metadata": {
7
- "id": "8CnkIPtjn8Dc"
8
- },
9
- "outputs": [],
10
- "source": [
11
- "!git clone --recursive https://huggingface.co/spaces/hysts/ControlNet"
12
- ]
13
- },
14
- {
15
- "cell_type": "code",
16
- "execution_count": null,
17
- "metadata": {
18
- "id": "IZlaYNTWoFPK"
19
- },
20
- "outputs": [],
21
- "source": [
22
- "%cd ControlNet"
23
- ]
24
- },
25
- {
26
- "cell_type": "code",
27
- "execution_count": null,
28
- "metadata": {
29
- "id": "0zhLFnZUoWdp"
30
- },
31
- "outputs": [],
32
- "source": [
33
- "!cd ControlNet && git apply ../patch && cd .."
34
- ]
35
- },
36
- {
37
- "cell_type": "code",
38
- "execution_count": null,
39
- "metadata": {
40
- "id": "P_fzYrLvoIcI"
41
- },
42
- "outputs": [],
43
- "source": [
44
- "!pip install -q -r requirements.txt"
45
- ]
46
- },
47
- {
48
- "cell_type": "code",
49
- "execution_count": null,
50
- "metadata": {
51
- "id": "GOfGng5Woktd"
52
- },
53
- "outputs": [],
54
- "source": [
55
- "import app"
56
- ]
57
- },
58
- {
59
- "cell_type": "code",
60
- "execution_count": null,
61
- "metadata": {
62
- "id": "7Cued230ol7T"
63
- },
64
- "outputs": [],
65
- "source": []
66
- }
67
- ],
68
- "metadata": {
69
- "accelerator": "GPU",
70
- "colab": {
71
- "provenance": []
72
- },
73
- "gpuClass": "standard",
74
- "language_info": {
75
- "name": "python"
76
- }
77
- },
78
- "nbformat": 4,
79
- "nbformat_minor": 0
80
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
patch CHANGED
@@ -113,16 +113,3 @@ index 500e53c..4061dbe 100644
113
  model = init_segmentor(config_file, checkpoint_file).cuda()
114
 
115
 
116
- diff --git a/annotator/util.py b/annotator/util.py
117
- index 7cde937..10a6d58 100644
118
- --- a/annotator/util.py
119
- +++ b/annotator/util.py
120
- @@ -25,7 +25,7 @@ def resize_image(input_image, resolution):
121
- H, W, C = input_image.shape
122
- H = float(H)
123
- W = float(W)
124
- - k = float(resolution) / min(H, W)
125
- + k = float(resolution) / max(H, W)
126
- H *= k
127
- W *= k
128
- H = int(np.round(H / 64.0)) * 64
 
113
  model = init_segmentor(config_file, checkpoint_file).cuda()
114
 
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,9 +1,7 @@
1
  addict==2.4.0
2
  albumentations==1.3.0
3
  einops==0.6.0
4
- git+https://github.com/huggingface/accelerate@78151f8
5
- git+https://github.com/huggingface/diffusers@fa6d52d
6
- gradio==3.36.1
7
  imageio==2.25.0
8
  imageio-ffmpeg==0.4.8
9
  kornia==0.6.9
@@ -18,5 +16,4 @@ timm==0.6.12
18
  torch==1.13.1
19
  torchvision==0.14.1
20
  transformers==4.26.1
21
- xformers==0.0.16
22
  yapf==0.32.0
 
1
  addict==2.4.0
2
  albumentations==1.3.0
3
  einops==0.6.0
4
+ gradio==3.18.0
 
 
5
  imageio==2.25.0
6
  imageio-ffmpeg==0.4.8
7
  kornia==0.6.9
 
16
  torch==1.13.1
17
  torchvision==0.14.1
18
  transformers==4.26.1
 
19
  yapf==0.32.0
style.css CHANGED
@@ -1,8 +1,3 @@
1
  h1 {
2
  text-align: center;
3
  }
4
-
5
- .note {
6
- text-align: center;
7
- font-size: 150%;
8
- }
 
1
  h1 {
2
  text-align: center;
3
  }