hysts commited on
Commit
a7a3cc5
1 Parent(s): 82b80d8
.gitignore ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ models/
2
+
3
+ # Byte-compiled / optimized / DLL files
4
+ __pycache__/
5
+ *.py[cod]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+ cover/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ .pybuilder/
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ # For a library or package, you might want to ignore these files since the code is
89
+ # intended to run in multiple environments; otherwise, check them in:
90
+ # .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # poetry
100
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
102
+ # commonly ignored for libraries.
103
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104
+ #poetry.lock
105
+
106
+ # pdm
107
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
108
+ #pdm.lock
109
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
110
+ # in version control.
111
+ # https://pdm.fming.dev/#use-with-ide
112
+ .pdm.toml
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+
133
+ # Spyder project settings
134
+ .spyderproject
135
+ .spyproject
136
+
137
+ # Rope project settings
138
+ .ropeproject
139
+
140
+ # mkdocs documentation
141
+ /site
142
+
143
+ # mypy
144
+ .mypy_cache/
145
+ .dmypy.json
146
+ dmypy.json
147
+
148
+ # Pyre type checker
149
+ .pyre/
150
+
151
+ # pytype static type analyzer
152
+ .pytype/
153
+
154
+ # Cython debug symbols
155
+ cython_debug/
156
+
157
+ # PyCharm
158
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
161
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
+ #.idea/
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "ControlNet"]
2
+ path = ControlNet
3
+ url = https://github.com/lllyasviel/ControlNet
.pre-commit-config.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ exclude: patch
2
+ repos:
3
+ - repo: https://github.com/pre-commit/pre-commit-hooks
4
+ rev: v4.2.0
5
+ hooks:
6
+ - id: check-executables-have-shebangs
7
+ - id: check-json
8
+ - id: check-merge-conflict
9
+ - id: check-shebang-scripts-are-executable
10
+ - id: check-toml
11
+ - id: check-yaml
12
+ - id: double-quote-string-fixer
13
+ - id: end-of-file-fixer
14
+ - id: mixed-line-ending
15
+ args: ['--fix=lf']
16
+ - id: requirements-txt-fixer
17
+ - id: trailing-whitespace
18
+ - repo: https://github.com/myint/docformatter
19
+ rev: v1.4
20
+ hooks:
21
+ - id: docformatter
22
+ args: ['--in-place']
23
+ - repo: https://github.com/pycqa/isort
24
+ rev: 5.12.0
25
+ hooks:
26
+ - id: isort
27
+ - repo: https://github.com/pre-commit/mirrors-mypy
28
+ rev: v0.991
29
+ hooks:
30
+ - id: mypy
31
+ args: ['--ignore-missing-imports']
32
+ additional_dependencies: ['types-python-slugify']
33
+ - repo: https://github.com/google/yapf
34
+ rev: v0.32.0
35
+ hooks:
36
+ - id: yapf
37
+ args: ['--parallel', '--in-place']
.style.yapf ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [style]
2
+ based_on_style = pep8
3
+ blank_line_before_nested_class_or_def = false
4
+ spaces_before_comment = 2
5
+ split_before_logical_operator = true
ControlNet ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit f4748e3630d8141d7765e2bd9b1e348f47847707
LICENSE.ControlNet ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md CHANGED
@@ -5,6 +5,7 @@ colorFrom: pink
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.18.0
 
8
  app_file: app.py
9
  pinned: false
10
  ---
 
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.18.0
8
+ python_version: 3.10.9
9
  app_file: app.py
10
  pinned: false
11
  ---
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import shlex
7
+ import subprocess
8
+
9
+ import gradio as gr
10
+
11
+ if os.getenv('SYSTEM') == 'spaces':
12
+ with open('patch') as f:
13
+ subprocess.run(shlex.split('patch -p1'), stdin=f, cwd='ControlNet')
14
+ commands = [
15
+ 'wget https://huggingface.co/ckpt/ControlNet/resolve/main/dpt_hybrid-midas-501f0c75.pt -O dpt_hybrid-midas-501f0c75.pt',
16
+ 'wget https://huggingface.co/ckpt/ControlNet/resolve/main/body_pose_model.pth -O body_pose_model.pth',
17
+ 'wget https://huggingface.co/ckpt/ControlNet/resolve/main/hand_pose_model.pth -O hand_pose_model.pth',
18
+ 'wget https://huggingface.co/ckpt/ControlNet/resolve/main/mlsd_large_512_fp32.pth -O mlsd_large_512_fp32.pth',
19
+ 'wget https://huggingface.co/ckpt/ControlNet/resolve/main/mlsd_tiny_512_fp32.pth -O mlsd_tiny_512_fp32.pth',
20
+ 'wget https://huggingface.co/ckpt/ControlNet/resolve/main/network-bsds500.pth -O network-bsds500.pth',
21
+ 'wget https://huggingface.co/ckpt/ControlNet/resolve/main/upernet_global_small.pth -O upernet_global_small.pth',
22
+ ]
23
+ for command in commands:
24
+ subprocess.run(shlex.split(command), cwd='ControlNet/annotator/ckpts/')
25
+
26
+ from gradio_canny2image import create_demo as create_demo_canny
27
+ from gradio_depth2image import create_demo as create_demo_depth
28
+ from gradio_fake_scribble2image import create_demo as create_demo_fake_scribble
29
+ from gradio_hed2image import create_demo as create_demo_hed
30
+ from gradio_hough2image import create_demo as create_demo_hough
31
+ from gradio_normal2image import create_demo as create_demo_normal
32
+ from gradio_pose2image import create_demo as create_demo_pose
33
+ from gradio_scribble2image import create_demo as create_demo_scribble
34
+ from gradio_scribble2image_interactive import \
35
+ create_demo as create_demo_scribble_interactive
36
+ from gradio_seg2image import create_demo as create_demo_seg
37
+ from model import Model
38
+
39
+ DESCRIPTION = '''# ControlNet
40
+
41
+ This is an unofficial demo for [https://github.com/lllyasviel/ControlNet](https://github.com/lllyasviel/ControlNet).
42
+ '''
43
+
44
+ model = Model()
45
+
46
+ with gr.Blocks(css='style.css') as demo:
47
+ gr.Markdown(DESCRIPTION)
48
+ with gr.Tabs():
49
+ with gr.TabItem('Canny'):
50
+ create_demo_canny(model.process_canny)
51
+ with gr.TabItem('Hough'):
52
+ create_demo_hough(model.process_hough)
53
+ with gr.TabItem('HED'):
54
+ create_demo_hed(model.process_hed)
55
+ with gr.TabItem('Scribble'):
56
+ create_demo_scribble(model.process_scribble)
57
+ with gr.TabItem('Scribble Interactive'):
58
+ create_demo_scribble_interactive(
59
+ model.process_scribble_interactive)
60
+ with gr.TabItem('Fake Scribble'):
61
+ create_demo_fake_scribble(model.process_fake_scribble)
62
+ with gr.TabItem('Pose'):
63
+ create_demo_pose(model.process_pose)
64
+ with gr.TabItem('Segmentation'):
65
+ create_demo_seg(model.process_seg)
66
+ with gr.TabItem('Depth'):
67
+ create_demo_depth(model.process_depth)
68
+ with gr.TabItem('Normal map'):
69
+ create_demo_normal(model.process_normal)
70
+
71
+ demo.queue().launch()
gradio_canny2image.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_canny2image.py
2
+ # The original license file is LICENSE.ControlNet this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Canny Edge Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ num_samples = gr.Slider(label='Images',
17
+ minimum=1,
18
+ maximum=12,
19
+ value=1,
20
+ step=1)
21
+ image_resolution = gr.Slider(label='Image Resolution',
22
+ minimum=256,
23
+ maximum=768,
24
+ value=512,
25
+ step=256)
26
+ low_threshold = gr.Slider(label='Canny low threshold',
27
+ minimum=1,
28
+ maximum=255,
29
+ value=100,
30
+ step=1)
31
+ high_threshold = gr.Slider(label='Canny high threshold',
32
+ minimum=1,
33
+ maximum=255,
34
+ value=200,
35
+ step=1)
36
+ ddim_steps = gr.Slider(label='Steps',
37
+ minimum=1,
38
+ maximum=100,
39
+ value=20,
40
+ step=1)
41
+ scale = gr.Slider(label='Guidance Scale',
42
+ minimum=0.1,
43
+ maximum=30.0,
44
+ value=9.0,
45
+ step=0.1)
46
+ seed = gr.Slider(label='Seed',
47
+ minimum=-1,
48
+ maximum=2147483647,
49
+ step=1,
50
+ randomize=True)
51
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
52
+ a_prompt = gr.Textbox(
53
+ label='Added Prompt',
54
+ value='best quality, extremely detailed')
55
+ n_prompt = gr.Textbox(
56
+ label='Negative Prompt',
57
+ value=
58
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
59
+ )
60
+ with gr.Column():
61
+ result_gallery = gr.Gallery(label='Output',
62
+ show_label=False,
63
+ elem_id='gallery').style(
64
+ grid=2, height='auto')
65
+ ips = [
66
+ input_image, prompt, a_prompt, n_prompt, num_samples,
67
+ image_resolution, ddim_steps, scale, seed, eta, low_threshold,
68
+ high_threshold
69
+ ]
70
+ run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
71
+ return demo
gradio_depth2image.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_depth2image.py
2
+ # The original license file is LICENSE.ControlNet this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Depth Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ num_samples = gr.Slider(label='Images',
17
+ minimum=1,
18
+ maximum=12,
19
+ value=1,
20
+ step=1)
21
+ image_resolution = gr.Slider(label='Image Resolution',
22
+ minimum=256,
23
+ maximum=768,
24
+ value=512,
25
+ step=256)
26
+ detect_resolution = gr.Slider(label='Depth Resolution',
27
+ minimum=128,
28
+ maximum=1024,
29
+ value=384,
30
+ step=1)
31
+ ddim_steps = gr.Slider(label='Steps',
32
+ minimum=1,
33
+ maximum=100,
34
+ value=20,
35
+ step=1)
36
+ scale = gr.Slider(label='Guidance Scale',
37
+ minimum=0.1,
38
+ maximum=30.0,
39
+ value=9.0,
40
+ step=0.1)
41
+ seed = gr.Slider(label='Seed',
42
+ minimum=-1,
43
+ maximum=2147483647,
44
+ step=1,
45
+ randomize=True)
46
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
47
+ a_prompt = gr.Textbox(
48
+ label='Added Prompt',
49
+ value='best quality, extremely detailed')
50
+ n_prompt = gr.Textbox(
51
+ label='Negative Prompt',
52
+ value=
53
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
54
+ )
55
+ with gr.Column():
56
+ result_gallery = gr.Gallery(label='Output',
57
+ show_label=False,
58
+ elem_id='gallery').style(
59
+ grid=2, height='auto')
60
+ ips = [
61
+ input_image, prompt, a_prompt, n_prompt, num_samples,
62
+ image_resolution, detect_resolution, ddim_steps, scale, seed, eta
63
+ ]
64
+ run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
65
+ return demo
gradio_fake_scribble2image.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_fake_scribble2image.py
2
+ # The original license file is LICENSE.ControlNet this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Fake Scribble Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ num_samples = gr.Slider(label='Images',
17
+ minimum=1,
18
+ maximum=12,
19
+ value=1,
20
+ step=1)
21
+ image_resolution = gr.Slider(label='Image Resolution',
22
+ minimum=256,
23
+ maximum=768,
24
+ value=512,
25
+ step=256)
26
+ detect_resolution = gr.Slider(label='HED Resolution',
27
+ minimum=128,
28
+ maximum=1024,
29
+ value=512,
30
+ step=1)
31
+ ddim_steps = gr.Slider(label='Steps',
32
+ minimum=1,
33
+ maximum=100,
34
+ value=20,
35
+ step=1)
36
+ scale = gr.Slider(label='Guidance Scale',
37
+ minimum=0.1,
38
+ maximum=30.0,
39
+ value=9.0,
40
+ step=0.1)
41
+ seed = gr.Slider(label='Seed',
42
+ minimum=-1,
43
+ maximum=2147483647,
44
+ step=1,
45
+ randomize=True)
46
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
47
+ a_prompt = gr.Textbox(
48
+ label='Added Prompt',
49
+ value='best quality, extremely detailed')
50
+ n_prompt = gr.Textbox(
51
+ label='Negative Prompt',
52
+ value=
53
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
54
+ )
55
+ with gr.Column():
56
+ result_gallery = gr.Gallery(label='Output',
57
+ show_label=False,
58
+ elem_id='gallery').style(
59
+ grid=2, height='auto')
60
+ ips = [
61
+ input_image, prompt, a_prompt, n_prompt, num_samples,
62
+ image_resolution, detect_resolution, ddim_steps, scale, seed, eta
63
+ ]
64
+ run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
65
+ return demo
gradio_hed2image.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_hed2image.py
2
+ # The original license file is LICENSE.ControlNet this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with HED Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ num_samples = gr.Slider(label='Images',
17
+ minimum=1,
18
+ maximum=12,
19
+ value=1,
20
+ step=1)
21
+ image_resolution = gr.Slider(label='Image Resolution',
22
+ minimum=256,
23
+ maximum=768,
24
+ value=512,
25
+ step=256)
26
+ detect_resolution = gr.Slider(label='HED Resolution',
27
+ minimum=128,
28
+ maximum=1024,
29
+ value=512,
30
+ step=1)
31
+ ddim_steps = gr.Slider(label='Steps',
32
+ minimum=1,
33
+ maximum=100,
34
+ value=20,
35
+ step=1)
36
+ scale = gr.Slider(label='Guidance Scale',
37
+ minimum=0.1,
38
+ maximum=30.0,
39
+ value=9.0,
40
+ step=0.1)
41
+ seed = gr.Slider(label='Seed',
42
+ minimum=-1,
43
+ maximum=2147483647,
44
+ step=1,
45
+ randomize=True)
46
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
47
+ a_prompt = gr.Textbox(
48
+ label='Added Prompt',
49
+ value='best quality, extremely detailed')
50
+ n_prompt = gr.Textbox(
51
+ label='Negative Prompt',
52
+ value=
53
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
54
+ )
55
+ with gr.Column():
56
+ result_gallery = gr.Gallery(label='Output',
57
+ show_label=False,
58
+ elem_id='gallery').style(
59
+ grid=2, height='auto')
60
+ ips = [
61
+ input_image, prompt, a_prompt, n_prompt, num_samples,
62
+ image_resolution, detect_resolution, ddim_steps, scale, seed, eta
63
+ ]
64
+ run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
65
+ return demo
gradio_hough2image.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_hough2image.py
2
+ # The original license file is LICENSE.ControlNet this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Hough Line Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ num_samples = gr.Slider(label='Images',
17
+ minimum=1,
18
+ maximum=12,
19
+ value=1,
20
+ step=1)
21
+ image_resolution = gr.Slider(label='Image Resolution',
22
+ minimum=256,
23
+ maximum=768,
24
+ value=512,
25
+ step=256)
26
+ detect_resolution = gr.Slider(label='Hough Resolution',
27
+ minimum=128,
28
+ maximum=1024,
29
+ value=512,
30
+ step=1)
31
+ value_threshold = gr.Slider(
32
+ label='Hough value threshold (MLSD)',
33
+ minimum=0.01,
34
+ maximum=2.0,
35
+ value=0.1,
36
+ step=0.01)
37
+ distance_threshold = gr.Slider(
38
+ label='Hough distance threshold (MLSD)',
39
+ minimum=0.01,
40
+ maximum=20.0,
41
+ value=0.1,
42
+ step=0.01)
43
+ ddim_steps = gr.Slider(label='Steps',
44
+ minimum=1,
45
+ maximum=100,
46
+ value=20,
47
+ step=1)
48
+ scale = gr.Slider(label='Guidance Scale',
49
+ minimum=0.1,
50
+ maximum=30.0,
51
+ value=9.0,
52
+ step=0.1)
53
+ seed = gr.Slider(label='Seed',
54
+ minimum=-1,
55
+ maximum=2147483647,
56
+ step=1,
57
+ randomize=True)
58
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
59
+ a_prompt = gr.Textbox(
60
+ label='Added Prompt',
61
+ value='best quality, extremely detailed')
62
+ n_prompt = gr.Textbox(
63
+ label='Negative Prompt',
64
+ value=
65
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
66
+ )
67
+ with gr.Column():
68
+ result_gallery = gr.Gallery(label='Output',
69
+ show_label=False,
70
+ elem_id='gallery').style(
71
+ grid=2, height='auto')
72
+ ips = [
73
+ input_image, prompt, a_prompt, n_prompt, num_samples,
74
+ image_resolution, detect_resolution, ddim_steps, scale, seed, eta,
75
+ value_threshold, distance_threshold
76
+ ]
77
+ run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
78
+ return demo
gradio_normal2image.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_normal2image.py
2
+ # The original license file is LICENSE.ControlNet this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Normal Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ num_samples = gr.Slider(label='Images',
17
+ minimum=1,
18
+ maximum=12,
19
+ value=1,
20
+ step=1)
21
+ image_resolution = gr.Slider(label='Image Resolution',
22
+ minimum=256,
23
+ maximum=768,
24
+ value=512,
25
+ step=256)
26
+ detect_resolution = gr.Slider(label='Normal Resolution',
27
+ minimum=128,
28
+ maximum=1024,
29
+ value=384,
30
+ step=1)
31
+ bg_threshold = gr.Slider(
32
+ label='Normal background threshold',
33
+ minimum=0.0,
34
+ maximum=1.0,
35
+ value=0.4,
36
+ step=0.01)
37
+ ddim_steps = gr.Slider(label='Steps',
38
+ minimum=1,
39
+ maximum=100,
40
+ value=20,
41
+ step=1)
42
+ scale = gr.Slider(label='Guidance Scale',
43
+ minimum=0.1,
44
+ maximum=30.0,
45
+ value=9.0,
46
+ step=0.1)
47
+ seed = gr.Slider(label='Seed',
48
+ minimum=-1,
49
+ maximum=2147483647,
50
+ step=1,
51
+ randomize=True)
52
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
53
+ a_prompt = gr.Textbox(
54
+ label='Added Prompt',
55
+ value='best quality, extremely detailed')
56
+ n_prompt = gr.Textbox(
57
+ label='Negative Prompt',
58
+ value=
59
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
60
+ )
61
+ with gr.Column():
62
+ result_gallery = gr.Gallery(label='Output',
63
+ show_label=False,
64
+ elem_id='gallery').style(
65
+ grid=2, height='auto')
66
+ ips = [
67
+ input_image, prompt, a_prompt, n_prompt, num_samples,
68
+ image_resolution, detect_resolution, ddim_steps, scale, seed, eta,
69
+ bg_threshold
70
+ ]
71
+ run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
72
+ return demo
gradio_pose2image.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_pose2image.py
2
+ # The original license file is LICENSE.ControlNet this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Human Pose')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ num_samples = gr.Slider(label='Images',
17
+ minimum=1,
18
+ maximum=12,
19
+ value=1,
20
+ step=1)
21
+ image_resolution = gr.Slider(label='Image Resolution',
22
+ minimum=256,
23
+ maximum=768,
24
+ value=512,
25
+ step=256)
26
+ detect_resolution = gr.Slider(label='OpenPose Resolution',
27
+ minimum=128,
28
+ maximum=1024,
29
+ value=512,
30
+ step=1)
31
+ ddim_steps = gr.Slider(label='Steps',
32
+ minimum=1,
33
+ maximum=100,
34
+ value=20,
35
+ step=1)
36
+ scale = gr.Slider(label='Guidance Scale',
37
+ minimum=0.1,
38
+ maximum=30.0,
39
+ value=9.0,
40
+ step=0.1)
41
+ seed = gr.Slider(label='Seed',
42
+ minimum=-1,
43
+ maximum=2147483647,
44
+ step=1,
45
+ randomize=True)
46
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
47
+ a_prompt = gr.Textbox(
48
+ label='Added Prompt',
49
+ value='best quality, extremely detailed')
50
+ n_prompt = gr.Textbox(
51
+ label='Negative Prompt',
52
+ value=
53
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
54
+ )
55
+ with gr.Column():
56
+ result_gallery = gr.Gallery(label='Output',
57
+ show_label=False,
58
+ elem_id='gallery').style(
59
+ grid=2, height='auto')
60
+ ips = [
61
+ input_image, prompt, a_prompt, n_prompt, num_samples,
62
+ image_resolution, detect_resolution, ddim_steps, scale, seed, eta
63
+ ]
64
+ run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
65
+ return demo
gradio_scribble2image.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_scribble2image.py
2
+ # The original license file is LICENSE.ControlNet this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Scribble Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ num_samples = gr.Slider(label='Images',
17
+ minimum=1,
18
+ maximum=12,
19
+ value=1,
20
+ step=1)
21
+ image_resolution = gr.Slider(label='Image Resolution',
22
+ minimum=256,
23
+ maximum=768,
24
+ value=512,
25
+ step=256)
26
+ ddim_steps = gr.Slider(label='Steps',
27
+ minimum=1,
28
+ maximum=100,
29
+ value=20,
30
+ step=1)
31
+ scale = gr.Slider(label='Guidance Scale',
32
+ minimum=0.1,
33
+ maximum=30.0,
34
+ value=9.0,
35
+ step=0.1)
36
+ seed = gr.Slider(label='Seed',
37
+ minimum=-1,
38
+ maximum=2147483647,
39
+ step=1,
40
+ randomize=True)
41
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
42
+ a_prompt = gr.Textbox(
43
+ label='Added Prompt',
44
+ value='best quality, extremely detailed')
45
+ n_prompt = gr.Textbox(
46
+ label='Negative Prompt',
47
+ value=
48
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
49
+ )
50
+ with gr.Column():
51
+ result_gallery = gr.Gallery(label='Output',
52
+ show_label=False,
53
+ elem_id='gallery').style(
54
+ grid=2, height='auto')
55
+ ips = [
56
+ input_image, prompt, a_prompt, n_prompt, num_samples,
57
+ image_resolution, ddim_steps, scale, seed, eta
58
+ ]
59
+ run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
60
+ return demo
gradio_scribble2image_interactive.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_scribble2image_interactive.py
2
+ # The original license file is LICENSE.ControlNet this repo.
3
+ import gradio as gr
4
+ import numpy as np
5
+
6
+
7
+ def create_canvas(w, h):
8
+ return np.zeros(shape=(h, w, 3), dtype=np.uint8) + 255
9
+
10
+
11
+ def create_demo(process):
12
+ with gr.Blocks() as demo:
13
+ with gr.Row():
14
+ gr.Markdown(
15
+ '## Control Stable Diffusion with Interactive Scribbles')
16
+ with gr.Row():
17
+ with gr.Column():
18
+ canvas_width = gr.Slider(label='Canvas Width',
19
+ minimum=256,
20
+ maximum=1024,
21
+ value=512,
22
+ step=1)
23
+ canvas_height = gr.Slider(label='Canvas Height',
24
+ minimum=256,
25
+ maximum=1024,
26
+ value=512,
27
+ step=1)
28
+ create_button = gr.Button(label='Start',
29
+ value='Open drawing canvas!')
30
+ input_image = gr.Image(source='upload',
31
+ type='numpy',
32
+ tool='sketch')
33
+ gr.Markdown(
34
+ value=
35
+ 'Do not forget to change your brush width to make it thinner. (Gradio do not allow developers to set brush width so you need to do it manually.) '
36
+ 'Just click on the small pencil icon in the upper right corner of the above block.'
37
+ )
38
+ create_button.click(fn=create_canvas,
39
+ inputs=[canvas_width, canvas_height],
40
+ outputs=[input_image])
41
+ prompt = gr.Textbox(label='Prompt')
42
+ run_button = gr.Button(label='Run')
43
+ with gr.Accordion('Advanced options', open=False):
44
+ num_samples = gr.Slider(label='Images',
45
+ minimum=1,
46
+ maximum=12,
47
+ value=1,
48
+ step=1)
49
+ image_resolution = gr.Slider(label='Image Resolution',
50
+ minimum=256,
51
+ maximum=768,
52
+ value=512,
53
+ step=256)
54
+ ddim_steps = gr.Slider(label='Steps',
55
+ minimum=1,
56
+ maximum=100,
57
+ value=20,
58
+ step=1)
59
+ scale = gr.Slider(label='Guidance Scale',
60
+ minimum=0.1,
61
+ maximum=30.0,
62
+ value=9.0,
63
+ step=0.1)
64
+ seed = gr.Slider(label='Seed',
65
+ minimum=-1,
66
+ maximum=2147483647,
67
+ step=1,
68
+ randomize=True)
69
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
70
+ a_prompt = gr.Textbox(
71
+ label='Added Prompt',
72
+ value='best quality, extremely detailed')
73
+ n_prompt = gr.Textbox(
74
+ label='Negative Prompt',
75
+ value=
76
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
77
+ )
78
+ with gr.Column():
79
+ result_gallery = gr.Gallery(label='Output',
80
+ show_label=False,
81
+ elem_id='gallery').style(
82
+ grid=2, height='auto')
83
+ ips = [
84
+ input_image, prompt, a_prompt, n_prompt, num_samples,
85
+ image_resolution, ddim_steps, scale, seed, eta
86
+ ]
87
+ run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
88
+ return demo
gradio_seg2image.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from https://github.com/lllyasviel/ControlNet/blob/f4748e3630d8141d7765e2bd9b1e348f47847707/gradio_seg2image.py
2
+ # The original license file is LICENSE.ControlNet this repo.
3
+ import gradio as gr
4
+
5
+
6
+ def create_demo(process):
7
+ with gr.Blocks() as demo:
8
+ with gr.Row():
9
+ gr.Markdown('## Control Stable Diffusion with Segmentation Maps')
10
+ with gr.Row():
11
+ with gr.Column():
12
+ input_image = gr.Image(source='upload', type='numpy')
13
+ prompt = gr.Textbox(label='Prompt')
14
+ run_button = gr.Button(label='Run')
15
+ with gr.Accordion('Advanced options', open=False):
16
+ num_samples = gr.Slider(label='Images',
17
+ minimum=1,
18
+ maximum=12,
19
+ value=1,
20
+ step=1)
21
+ image_resolution = gr.Slider(label='Image Resolution',
22
+ minimum=256,
23
+ maximum=768,
24
+ value=512,
25
+ step=256)
26
+ detect_resolution = gr.Slider(
27
+ label='Segmentation Resolution',
28
+ minimum=128,
29
+ maximum=1024,
30
+ value=512,
31
+ step=1)
32
+ ddim_steps = gr.Slider(label='Steps',
33
+ minimum=1,
34
+ maximum=100,
35
+ value=20,
36
+ step=1)
37
+ scale = gr.Slider(label='Guidance Scale',
38
+ minimum=0.1,
39
+ maximum=30.0,
40
+ value=9.0,
41
+ step=0.1)
42
+ seed = gr.Slider(label='Seed',
43
+ minimum=-1,
44
+ maximum=2147483647,
45
+ step=1,
46
+ randomize=True)
47
+ eta = gr.Number(label='eta (DDIM)', value=0.0)
48
+ a_prompt = gr.Textbox(
49
+ label='Added Prompt',
50
+ value='best quality, extremely detailed')
51
+ n_prompt = gr.Textbox(
52
+ label='Negative Prompt',
53
+ value=
54
+ 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
55
+ )
56
+ with gr.Column():
57
+ result_gallery = gr.Gallery(label='Output',
58
+ show_label=False,
59
+ elem_id='gallery').style(
60
+ grid=2, height='auto')
61
+ ips = [
62
+ input_image, prompt, a_prompt, n_prompt, num_samples,
63
+ image_resolution, detect_resolution, ddim_steps, scale, seed, eta
64
+ ]
65
+ run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
66
+ return demo
model.py ADDED
@@ -0,0 +1,725 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is adapted from gradio_*.py in https://github.com/lllyasviel/ControlNet/tree/f4748e3630d8141d7765e2bd9b1e348f47847707
2
+ # The original license file is LICENSE.ControlNet this repo.
3
+ from __future__ import annotations
4
+
5
+ import pathlib
6
+ import random
7
+ import shlex
8
+ import subprocess
9
+ import sys
10
+
11
+ import cv2
12
+ import einops
13
+ import numpy as np
14
+ import torch
15
+ from pytorch_lightning import seed_everything
16
+
17
+ sys.path.append('ControlNet')
18
+
19
+ import config
20
+ from annotator.canny import apply_canny
21
+ from annotator.hed import apply_hed, nms
22
+ from annotator.midas import apply_midas
23
+ from annotator.mlsd import apply_mlsd
24
+ from annotator.openpose import apply_openpose
25
+ from annotator.uniformer import apply_uniformer
26
+ from annotator.util import HWC3, resize_image
27
+ from cldm.model import create_model, load_state_dict
28
+ from ldm.models.diffusion.ddim import DDIMSampler
29
+ from share import *
30
+
31
+
32
+ class Model:
33
+ WEIGHT_NAMES = {
34
+ 'canny': 'control_sd15_canny.pth',
35
+ 'hough': 'control_sd15_mlsd.pth',
36
+ 'hed': 'control_sd15_hed.pth',
37
+ 'scribble': 'control_sd15_scribble.pth',
38
+ 'pose': 'control_sd15_openpose.pth',
39
+ 'seg': 'control_sd15_seg.pth',
40
+ 'depth': 'control_sd15_depth.pth',
41
+ 'normal': 'control_sd15_normal.pth',
42
+ }
43
+
44
+ def __init__(self,
45
+ model_config_path: str = 'ControlNet/models/cldm_v15.yaml',
46
+ model_dir: str = 'models'):
47
+ self.device = torch.device(
48
+ 'cuda:0' if torch.cuda.is_available() else 'cpu')
49
+ self.model = create_model(model_config_path).to(self.device)
50
+ self.ddim_sampler = DDIMSampler(self.model)
51
+ self.task_name = ''
52
+
53
+ self.model_dir = pathlib.Path(model_dir)
54
+ self.download_models()
55
+
56
+ def load_weight(self, task_name: str) -> None:
57
+ if task_name == self.task_name:
58
+ return
59
+ weight_path = self.get_weight_path(task_name)
60
+ self.model.load_state_dict(
61
+ load_state_dict(weight_path, location=self.device))
62
+ self.task_name = task_name
63
+
64
+ def get_weight_path(self, task_name: str) -> str:
65
+ if 'scribble' in task_name:
66
+ task_name = 'scribble'
67
+ return f'{self.model_dir}/{self.WEIGHT_NAMES[task_name]}'
68
+
69
+ def download_models(self):
70
+ self.model_dir.mkdir(exist_ok=True, parents=True)
71
+ for name in self.WEIGHT_NAMES.values():
72
+ out_path = self.model_dir / name
73
+ if out_path.exists():
74
+ continue
75
+ subprocess.run(
76
+ shlex.split(
77
+ f'wget https://huggingface.co/ckpt/ControlNet/resolve/main/{name} -O {out_path}'
78
+ ))
79
+
80
+ @torch.inference_mode()
81
+ def process_canny(self, input_image, prompt, a_prompt, n_prompt,
82
+ num_samples, image_resolution, ddim_steps, scale, seed,
83
+ eta, low_threshold, high_threshold):
84
+ self.load_weight('canny')
85
+
86
+ img = resize_image(HWC3(input_image), image_resolution)
87
+ H, W, C = img.shape
88
+
89
+ detected_map = apply_canny(img, low_threshold, high_threshold)
90
+ detected_map = HWC3(detected_map)
91
+
92
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
93
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
94
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
95
+
96
+ if seed == -1:
97
+ seed = random.randint(0, 65535)
98
+ seed_everything(seed)
99
+
100
+ if config.save_memory:
101
+ self.model.low_vram_shift(is_diffusing=False)
102
+
103
+ cond = {
104
+ 'c_concat': [control],
105
+ 'c_crossattn': [
106
+ self.model.get_learned_conditioning(
107
+ [prompt + ', ' + a_prompt] * num_samples)
108
+ ]
109
+ }
110
+ un_cond = {
111
+ 'c_concat': [control],
112
+ 'c_crossattn':
113
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
114
+ }
115
+ shape = (4, H // 8, W // 8)
116
+
117
+ if config.save_memory:
118
+ self.model.low_vram_shift(is_diffusing=True)
119
+
120
+ samples, intermediates = self.ddim_sampler.sample(
121
+ ddim_steps,
122
+ num_samples,
123
+ shape,
124
+ cond,
125
+ verbose=False,
126
+ eta=eta,
127
+ unconditional_guidance_scale=scale,
128
+ unconditional_conditioning=un_cond)
129
+
130
+ if config.save_memory:
131
+ self.model.low_vram_shift(is_diffusing=False)
132
+
133
+ x_samples = self.model.decode_first_stage(samples)
134
+ x_samples = (
135
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
136
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
137
+
138
+ results = [x_samples[i] for i in range(num_samples)]
139
+ return [255 - detected_map] + results
140
+
141
+ @torch.inference_mode()
142
+ def process_hough(self, input_image, prompt, a_prompt, n_prompt,
143
+ num_samples, image_resolution, detect_resolution,
144
+ ddim_steps, scale, seed, eta, value_threshold,
145
+ distance_threshold):
146
+ self.load_weight('hough')
147
+
148
+ input_image = HWC3(input_image)
149
+ detected_map = apply_mlsd(resize_image(input_image, detect_resolution),
150
+ value_threshold, distance_threshold)
151
+ detected_map = HWC3(detected_map)
152
+ img = resize_image(input_image, image_resolution)
153
+ H, W, C = img.shape
154
+
155
+ detected_map = cv2.resize(detected_map, (W, H),
156
+ interpolation=cv2.INTER_NEAREST)
157
+
158
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
159
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
160
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
161
+
162
+ if seed == -1:
163
+ seed = random.randint(0, 65535)
164
+ seed_everything(seed)
165
+
166
+ if config.save_memory:
167
+ self.model.low_vram_shift(is_diffusing=False)
168
+
169
+ cond = {
170
+ 'c_concat': [control],
171
+ 'c_crossattn': [
172
+ self.model.get_learned_conditioning(
173
+ [prompt + ', ' + a_prompt] * num_samples)
174
+ ]
175
+ }
176
+ un_cond = {
177
+ 'c_concat': [control],
178
+ 'c_crossattn':
179
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
180
+ }
181
+ shape = (4, H // 8, W // 8)
182
+
183
+ if config.save_memory:
184
+ self.model.low_vram_shift(is_diffusing=True)
185
+
186
+ samples, intermediates = self.ddim_sampler.sample(
187
+ ddim_steps,
188
+ num_samples,
189
+ shape,
190
+ cond,
191
+ verbose=False,
192
+ eta=eta,
193
+ unconditional_guidance_scale=scale,
194
+ unconditional_conditioning=un_cond)
195
+
196
+ if config.save_memory:
197
+ self.model.low_vram_shift(is_diffusing=False)
198
+
199
+ x_samples = self.model.decode_first_stage(samples)
200
+ x_samples = (
201
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
202
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
203
+
204
+ results = [x_samples[i] for i in range(num_samples)]
205
+ return [
206
+ 255 - cv2.dilate(detected_map,
207
+ np.ones(shape=(3, 3), dtype=np.uint8),
208
+ iterations=1)
209
+ ] + results
210
+
211
+ @torch.inference_mode()
212
+ def process_hed(self, input_image, prompt, a_prompt, n_prompt, num_samples,
213
+ image_resolution, detect_resolution, ddim_steps, scale,
214
+ seed, eta):
215
+ self.load_weight('hed')
216
+
217
+ input_image = HWC3(input_image)
218
+ detected_map = apply_hed(resize_image(input_image, detect_resolution))
219
+ detected_map = HWC3(detected_map)
220
+ img = resize_image(input_image, image_resolution)
221
+ H, W, C = img.shape
222
+
223
+ detected_map = cv2.resize(detected_map, (W, H),
224
+ interpolation=cv2.INTER_LINEAR)
225
+
226
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
227
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
228
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
229
+
230
+ if seed == -1:
231
+ seed = random.randint(0, 65535)
232
+ seed_everything(seed)
233
+
234
+ if config.save_memory:
235
+ self.model.low_vram_shift(is_diffusing=False)
236
+
237
+ cond = {
238
+ 'c_concat': [control],
239
+ 'c_crossattn': [
240
+ self.model.get_learned_conditioning(
241
+ [prompt + ', ' + a_prompt] * num_samples)
242
+ ]
243
+ }
244
+ un_cond = {
245
+ 'c_concat': [control],
246
+ 'c_crossattn':
247
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
248
+ }
249
+ shape = (4, H // 8, W // 8)
250
+
251
+ if config.save_memory:
252
+ self.model.low_vram_shift(is_diffusing=True)
253
+
254
+ samples, intermediates = self.ddim_sampler.sample(
255
+ ddim_steps,
256
+ num_samples,
257
+ shape,
258
+ cond,
259
+ verbose=False,
260
+ eta=eta,
261
+ unconditional_guidance_scale=scale,
262
+ unconditional_conditioning=un_cond)
263
+
264
+ if config.save_memory:
265
+ self.model.low_vram_shift(is_diffusing=False)
266
+
267
+ x_samples = self.model.decode_first_stage(samples)
268
+ x_samples = (
269
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
270
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
271
+
272
+ results = [x_samples[i] for i in range(num_samples)]
273
+ return [detected_map] + results
274
+
275
+ @torch.inference_mode()
276
+ def process_scribble(self, input_image, prompt, a_prompt, n_prompt,
277
+ num_samples, image_resolution, ddim_steps, scale,
278
+ seed, eta):
279
+ self.load_weight('scribble')
280
+
281
+ img = resize_image(HWC3(input_image), image_resolution)
282
+ H, W, C = img.shape
283
+
284
+ detected_map = np.zeros_like(img, dtype=np.uint8)
285
+ detected_map[np.min(img, axis=2) < 127] = 255
286
+
287
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
288
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
289
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
290
+
291
+ if seed == -1:
292
+ seed = random.randint(0, 65535)
293
+ seed_everything(seed)
294
+
295
+ if config.save_memory:
296
+ self.model.low_vram_shift(is_diffusing=False)
297
+
298
+ cond = {
299
+ 'c_concat': [control],
300
+ 'c_crossattn': [
301
+ self.model.get_learned_conditioning(
302
+ [prompt + ', ' + a_prompt] * num_samples)
303
+ ]
304
+ }
305
+ un_cond = {
306
+ 'c_concat': [control],
307
+ 'c_crossattn':
308
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
309
+ }
310
+ shape = (4, H // 8, W // 8)
311
+
312
+ if config.save_memory:
313
+ self.model.low_vram_shift(is_diffusing=True)
314
+
315
+ samples, intermediates = self.ddim_sampler.sample(
316
+ ddim_steps,
317
+ num_samples,
318
+ shape,
319
+ cond,
320
+ verbose=False,
321
+ eta=eta,
322
+ unconditional_guidance_scale=scale,
323
+ unconditional_conditioning=un_cond)
324
+
325
+ if config.save_memory:
326
+ self.model.low_vram_shift(is_diffusing=False)
327
+
328
+ x_samples = self.model.decode_first_stage(samples)
329
+ x_samples = (
330
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
331
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
332
+
333
+ results = [x_samples[i] for i in range(num_samples)]
334
+ return [255 - detected_map] + results
335
+
336
+ @torch.inference_mode()
337
+ def process_scribble_interactive(self, input_image, prompt, a_prompt,
338
+ n_prompt, num_samples, image_resolution,
339
+ ddim_steps, scale, seed, eta):
340
+ self.load_weight('scribble')
341
+
342
+ img = resize_image(HWC3(input_image['mask'][:, :, 0]),
343
+ image_resolution)
344
+ H, W, C = img.shape
345
+
346
+ detected_map = np.zeros_like(img, dtype=np.uint8)
347
+ detected_map[np.min(img, axis=2) > 127] = 255
348
+
349
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
350
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
351
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
352
+
353
+ if seed == -1:
354
+ seed = random.randint(0, 65535)
355
+ seed_everything(seed)
356
+
357
+ if config.save_memory:
358
+ self.model.low_vram_shift(is_diffusing=False)
359
+
360
+ cond = {
361
+ 'c_concat': [control],
362
+ 'c_crossattn': [
363
+ self.model.get_learned_conditioning(
364
+ [prompt + ', ' + a_prompt] * num_samples)
365
+ ]
366
+ }
367
+ un_cond = {
368
+ 'c_concat': [control],
369
+ 'c_crossattn':
370
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
371
+ }
372
+ shape = (4, H // 8, W // 8)
373
+
374
+ if config.save_memory:
375
+ self.model.low_vram_shift(is_diffusing=True)
376
+
377
+ samples, intermediates = self.ddim_sampler.sample(
378
+ ddim_steps,
379
+ num_samples,
380
+ shape,
381
+ cond,
382
+ verbose=False,
383
+ eta=eta,
384
+ unconditional_guidance_scale=scale,
385
+ unconditional_conditioning=un_cond)
386
+
387
+ if config.save_memory:
388
+ self.model.low_vram_shift(is_diffusing=False)
389
+
390
+ x_samples = self.model.decode_first_stage(samples)
391
+ x_samples = (
392
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
393
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
394
+
395
+ results = [x_samples[i] for i in range(num_samples)]
396
+ return [255 - detected_map] + results
397
+
398
+ @torch.inference_mode()
399
+ def process_fake_scribble(self, input_image, prompt, a_prompt, n_prompt,
400
+ num_samples, image_resolution, detect_resolution,
401
+ ddim_steps, scale, seed, eta):
402
+ self.load_weight('scribble')
403
+
404
+ input_image = HWC3(input_image)
405
+ detected_map = apply_hed(resize_image(input_image, detect_resolution))
406
+ detected_map = HWC3(detected_map)
407
+ img = resize_image(input_image, image_resolution)
408
+ H, W, C = img.shape
409
+
410
+ detected_map = cv2.resize(detected_map, (W, H),
411
+ interpolation=cv2.INTER_LINEAR)
412
+ detected_map = nms(detected_map, 127, 3.0)
413
+ detected_map = cv2.GaussianBlur(detected_map, (0, 0), 3.0)
414
+ detected_map[detected_map > 4] = 255
415
+ detected_map[detected_map < 255] = 0
416
+
417
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
418
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
419
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
420
+
421
+ if seed == -1:
422
+ seed = random.randint(0, 65535)
423
+ seed_everything(seed)
424
+
425
+ if config.save_memory:
426
+ self.model.low_vram_shift(is_diffusing=False)
427
+
428
+ cond = {
429
+ 'c_concat': [control],
430
+ 'c_crossattn': [
431
+ self.model.get_learned_conditioning(
432
+ [prompt + ', ' + a_prompt] * num_samples)
433
+ ]
434
+ }
435
+ un_cond = {
436
+ 'c_concat': [control],
437
+ 'c_crossattn':
438
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
439
+ }
440
+ shape = (4, H // 8, W // 8)
441
+
442
+ if config.save_memory:
443
+ self.model.low_vram_shift(is_diffusing=True)
444
+
445
+ samples, intermediates = self.ddim_sampler.sample(
446
+ ddim_steps,
447
+ num_samples,
448
+ shape,
449
+ cond,
450
+ verbose=False,
451
+ eta=eta,
452
+ unconditional_guidance_scale=scale,
453
+ unconditional_conditioning=un_cond)
454
+
455
+ if config.save_memory:
456
+ self.model.low_vram_shift(is_diffusing=False)
457
+
458
+ x_samples = self.model.decode_first_stage(samples)
459
+ x_samples = (
460
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
461
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
462
+
463
+ results = [x_samples[i] for i in range(num_samples)]
464
+ return [255 - detected_map] + results
465
+
466
+ @torch.inference_mode()
467
+ def process_pose(self, input_image, prompt, a_prompt, n_prompt,
468
+ num_samples, image_resolution, detect_resolution,
469
+ ddim_steps, scale, seed, eta):
470
+ self.load_weight('pose')
471
+
472
+ input_image = HWC3(input_image)
473
+ detected_map, _ = apply_openpose(
474
+ resize_image(input_image, detect_resolution))
475
+ detected_map = HWC3(detected_map)
476
+ img = resize_image(input_image, image_resolution)
477
+ H, W, C = img.shape
478
+
479
+ detected_map = cv2.resize(detected_map, (W, H),
480
+ interpolation=cv2.INTER_NEAREST)
481
+
482
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
483
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
484
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
485
+
486
+ if seed == -1:
487
+ seed = random.randint(0, 65535)
488
+ seed_everything(seed)
489
+
490
+ if config.save_memory:
491
+ self.model.low_vram_shift(is_diffusing=False)
492
+
493
+ cond = {
494
+ 'c_concat': [control],
495
+ 'c_crossattn': [
496
+ self.model.get_learned_conditioning(
497
+ [prompt + ', ' + a_prompt] * num_samples)
498
+ ]
499
+ }
500
+ un_cond = {
501
+ 'c_concat': [control],
502
+ 'c_crossattn':
503
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
504
+ }
505
+ shape = (4, H // 8, W // 8)
506
+
507
+ if config.save_memory:
508
+ self.model.low_vram_shift(is_diffusing=True)
509
+
510
+ samples, intermediates = self.ddim_sampler.sample(
511
+ ddim_steps,
512
+ num_samples,
513
+ shape,
514
+ cond,
515
+ verbose=False,
516
+ eta=eta,
517
+ unconditional_guidance_scale=scale,
518
+ unconditional_conditioning=un_cond)
519
+
520
+ if config.save_memory:
521
+ self.model.low_vram_shift(is_diffusing=False)
522
+
523
+ x_samples = self.model.decode_first_stage(samples)
524
+ x_samples = (
525
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
526
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
527
+
528
+ results = [x_samples[i] for i in range(num_samples)]
529
+ return [detected_map] + results
530
+
531
+ @torch.inference_mode()
532
+ def process_seg(self, input_image, prompt, a_prompt, n_prompt, num_samples,
533
+ image_resolution, detect_resolution, ddim_steps, scale,
534
+ seed, eta):
535
+ self.load_weight('seg')
536
+
537
+ input_image = HWC3(input_image)
538
+ detected_map = apply_uniformer(
539
+ resize_image(input_image, detect_resolution))
540
+ img = resize_image(input_image, image_resolution)
541
+ H, W, C = img.shape
542
+
543
+ detected_map = cv2.resize(detected_map, (W, H),
544
+ interpolation=cv2.INTER_NEAREST)
545
+
546
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
547
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
548
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
549
+
550
+ if seed == -1:
551
+ seed = random.randint(0, 65535)
552
+ seed_everything(seed)
553
+
554
+ if config.save_memory:
555
+ self.model.low_vram_shift(is_diffusing=False)
556
+
557
+ cond = {
558
+ 'c_concat': [control],
559
+ 'c_crossattn': [
560
+ self.model.get_learned_conditioning(
561
+ [prompt + ', ' + a_prompt] * num_samples)
562
+ ]
563
+ }
564
+ un_cond = {
565
+ 'c_concat': [control],
566
+ 'c_crossattn':
567
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
568
+ }
569
+ shape = (4, H // 8, W // 8)
570
+
571
+ if config.save_memory:
572
+ self.model.low_vram_shift(is_diffusing=True)
573
+
574
+ samples, intermediates = self.ddim_sampler.sample(
575
+ ddim_steps,
576
+ num_samples,
577
+ shape,
578
+ cond,
579
+ verbose=False,
580
+ eta=eta,
581
+ unconditional_guidance_scale=scale,
582
+ unconditional_conditioning=un_cond)
583
+
584
+ if config.save_memory:
585
+ self.model.low_vram_shift(is_diffusing=False)
586
+
587
+ x_samples = self.model.decode_first_stage(samples)
588
+ x_samples = (
589
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
590
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
591
+
592
+ results = [x_samples[i] for i in range(num_samples)]
593
+ return [detected_map] + results
594
+
595
+ @torch.inference_mode()
596
+ def process_depth(self, input_image, prompt, a_prompt, n_prompt,
597
+ num_samples, image_resolution, detect_resolution,
598
+ ddim_steps, scale, seed, eta):
599
+ self.load_weight('depth')
600
+
601
+ input_image = HWC3(input_image)
602
+ detected_map, _ = apply_midas(
603
+ resize_image(input_image, detect_resolution))
604
+ detected_map = HWC3(detected_map)
605
+ img = resize_image(input_image, image_resolution)
606
+ H, W, C = img.shape
607
+
608
+ detected_map = cv2.resize(detected_map, (W, H),
609
+ interpolation=cv2.INTER_LINEAR)
610
+
611
+ control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
612
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
613
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
614
+
615
+ if seed == -1:
616
+ seed = random.randint(0, 65535)
617
+ seed_everything(seed)
618
+
619
+ if config.save_memory:
620
+ self.model.low_vram_shift(is_diffusing=False)
621
+
622
+ cond = {
623
+ 'c_concat': [control],
624
+ 'c_crossattn': [
625
+ self.model.get_learned_conditioning(
626
+ [prompt + ', ' + a_prompt] * num_samples)
627
+ ]
628
+ }
629
+ un_cond = {
630
+ 'c_concat': [control],
631
+ 'c_crossattn':
632
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
633
+ }
634
+ shape = (4, H // 8, W // 8)
635
+
636
+ if config.save_memory:
637
+ self.model.low_vram_shift(is_diffusing=True)
638
+
639
+ samples, intermediates = self.ddim_sampler.sample(
640
+ ddim_steps,
641
+ num_samples,
642
+ shape,
643
+ cond,
644
+ verbose=False,
645
+ eta=eta,
646
+ unconditional_guidance_scale=scale,
647
+ unconditional_conditioning=un_cond)
648
+
649
+ if config.save_memory:
650
+ self.model.low_vram_shift(is_diffusing=False)
651
+
652
+ x_samples = self.model.decode_first_stage(samples)
653
+ x_samples = (
654
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
655
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
656
+
657
+ results = [x_samples[i] for i in range(num_samples)]
658
+ return [detected_map] + results
659
+
660
+ @torch.inference_mode()
661
+ def process_normal(self, input_image, prompt, a_prompt, n_prompt,
662
+ num_samples, image_resolution, detect_resolution,
663
+ ddim_steps, scale, seed, eta, bg_threshold):
664
+ self.load_weight('normal')
665
+
666
+ input_image = HWC3(input_image)
667
+ _, detected_map = apply_midas(resize_image(input_image,
668
+ detect_resolution),
669
+ bg_th=bg_threshold)
670
+ detected_map = HWC3(detected_map)
671
+ img = resize_image(input_image, image_resolution)
672
+ H, W, C = img.shape
673
+
674
+ detected_map = cv2.resize(detected_map, (W, H),
675
+ interpolation=cv2.INTER_LINEAR)
676
+
677
+ control = torch.from_numpy(
678
+ detected_map[:, :, ::-1].copy()).float().cuda() / 255.0
679
+ control = torch.stack([control for _ in range(num_samples)], dim=0)
680
+ control = einops.rearrange(control, 'b h w c -> b c h w').clone()
681
+
682
+ if seed == -1:
683
+ seed = random.randint(0, 65535)
684
+ seed_everything(seed)
685
+
686
+ if config.save_memory:
687
+ self.model.low_vram_shift(is_diffusing=False)
688
+
689
+ cond = {
690
+ 'c_concat': [control],
691
+ 'c_crossattn': [
692
+ self.model.get_learned_conditioning(
693
+ [prompt + ', ' + a_prompt] * num_samples)
694
+ ]
695
+ }
696
+ un_cond = {
697
+ 'c_concat': [control],
698
+ 'c_crossattn':
699
+ [self.model.get_learned_conditioning([n_prompt] * num_samples)]
700
+ }
701
+ shape = (4, H // 8, W // 8)
702
+
703
+ if config.save_memory:
704
+ self.model.low_vram_shift(is_diffusing=True)
705
+
706
+ samples, intermediates = self.ddim_sampler.sample(
707
+ ddim_steps,
708
+ num_samples,
709
+ shape,
710
+ cond,
711
+ verbose=False,
712
+ eta=eta,
713
+ unconditional_guidance_scale=scale,
714
+ unconditional_conditioning=un_cond)
715
+
716
+ if config.save_memory:
717
+ self.model.low_vram_shift(is_diffusing=False)
718
+
719
+ x_samples = self.model.decode_first_stage(samples)
720
+ x_samples = (
721
+ einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 +
722
+ 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
723
+
724
+ results = [x_samples[i] for i in range(num_samples)]
725
+ return [detected_map] + results
patch ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diff --git a/annotator/hed/__init__.py b/annotator/hed/__init__.py
2
+ index 42d8dc6..1587035 100644
3
+ --- a/annotator/hed/__init__.py
4
+ +++ b/annotator/hed/__init__.py
5
+ @@ -1,8 +1,12 @@
6
+ +import pathlib
7
+ +
8
+ import numpy as np
9
+ import cv2
10
+ import torch
11
+ from einops import rearrange
12
+
13
+ +root_dir = pathlib.Path(__file__).parents[2]
14
+ +
15
+
16
+ class Network(torch.nn.Module):
17
+ def __init__(self):
18
+ @@ -64,7 +68,7 @@ class Network(torch.nn.Module):
19
+ torch.nn.Sigmoid()
20
+ )
21
+
22
+ - self.load_state_dict({strKey.replace('module', 'net'): tenWeight for strKey, tenWeight in torch.load('./annotator/ckpts/network-bsds500.pth').items()})
23
+ + self.load_state_dict({strKey.replace('module', 'net'): tenWeight for strKey, tenWeight in torch.load(f'{root_dir}/annotator/ckpts/network-bsds500.pth').items()})
24
+ # end
25
+
26
+ def forward(self, tenInput):
27
+ diff --git a/annotator/midas/api.py b/annotator/midas/api.py
28
+ index 9fa305e..d8594ea 100644
29
+ --- a/annotator/midas/api.py
30
+ +++ b/annotator/midas/api.py
31
+ @@ -1,5 +1,7 @@
32
+ # based on https://github.com/isl-org/MiDaS
33
+
34
+ +import pathlib
35
+ +
36
+ import cv2
37
+ import torch
38
+ import torch.nn as nn
39
+ @@ -10,10 +12,11 @@ from .midas.midas_net import MidasNet
40
+ from .midas.midas_net_custom import MidasNet_small
41
+ from .midas.transforms import Resize, NormalizeImage, PrepareForNet
42
+
43
+ +root_dir = pathlib.Path(__file__).parents[2]
44
+
45
+ ISL_PATHS = {
46
+ - "dpt_large": "annotator/ckpts/dpt_large-midas-2f21e586.pt",
47
+ - "dpt_hybrid": "annotator/ckpts/dpt_hybrid-midas-501f0c75.pt",
48
+ + "dpt_large": f"{root_dir}/annotator/ckpts/dpt_large-midas-2f21e586.pt",
49
+ + "dpt_hybrid": f"{root_dir}/annotator/ckpts/dpt_hybrid-midas-501f0c75.pt",
50
+ "midas_v21": "",
51
+ "midas_v21_small": "",
52
+ }
53
+ diff --git a/annotator/mlsd/__init__.py b/annotator/mlsd/__init__.py
54
+ index 75db717..f310fe6 100644
55
+ --- a/annotator/mlsd/__init__.py
56
+ +++ b/annotator/mlsd/__init__.py
57
+ @@ -1,3 +1,5 @@
58
+ +import pathlib
59
+ +
60
+ import cv2
61
+ import numpy as np
62
+ import torch
63
+ @@ -8,8 +10,9 @@ from .models.mbv2_mlsd_tiny import MobileV2_MLSD_Tiny
64
+ from .models.mbv2_mlsd_large import MobileV2_MLSD_Large
65
+ from .utils import pred_lines
66
+
67
+ +root_dir = pathlib.Path(__file__).parents[2]
68
+
69
+ -model_path = './annotator/ckpts/mlsd_large_512_fp32.pth'
70
+ +model_path = f'{root_dir}/annotator/ckpts/mlsd_large_512_fp32.pth'
71
+ model = MobileV2_MLSD_Large()
72
+ model.load_state_dict(torch.load(model_path), strict=True)
73
+ model = model.cuda().eval()
74
+ diff --git a/annotator/openpose/__init__.py b/annotator/openpose/__init__.py
75
+ index 47d50a5..2369eed 100644
76
+ --- a/annotator/openpose/__init__.py
77
+ +++ b/annotator/openpose/__init__.py
78
+ @@ -1,4 +1,5 @@
79
+ import os
80
+ +import pathlib
81
+ os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
82
+
83
+ import torch
84
+ @@ -7,8 +8,10 @@ from . import util
85
+ from .body import Body
86
+ from .hand import Hand
87
+
88
+ -body_estimation = Body('./annotator/ckpts/body_pose_model.pth')
89
+ -hand_estimation = Hand('./annotator/ckpts/hand_pose_model.pth')
90
+ +root_dir = pathlib.Path(__file__).parents[2]
91
+ +
92
+ +body_estimation = Body(f'{root_dir}/annotator/ckpts/body_pose_model.pth')
93
+ +hand_estimation = Hand(f'{root_dir}/annotator/ckpts/hand_pose_model.pth')
94
+
95
+
96
+ def apply_openpose(oriImg, hand=False):
97
+ diff --git a/annotator/uniformer/__init__.py b/annotator/uniformer/__init__.py
98
+ index 500e53c..4061dbe 100644
99
+ --- a/annotator/uniformer/__init__.py
100
+ +++ b/annotator/uniformer/__init__.py
101
+ @@ -1,9 +1,12 @@
102
+ +import pathlib
103
+ +
104
+ from annotator.uniformer.mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot
105
+ from annotator.uniformer.mmseg.core.evaluation import get_palette
106
+
107
+ +root_dir = pathlib.Path(__file__).parents[2]
108
+
109
+ -checkpoint_file = "annotator/ckpts/upernet_global_small.pth"
110
+ -config_file = 'annotator/uniformer/exp/upernet_global_small/config.py'
111
+ +checkpoint_file = f"{root_dir}/annotator/ckpts/upernet_global_small.pth"
112
+ +config_file = f'{root_dir}/annotator/uniformer/exp/upernet_global_small/config.py'
113
+ model = init_segmentor(config_file, checkpoint_file).cuda()
114
+
115
+
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ addict==2.4.0
2
+ albumentations==1.3.0
3
+ einops==0.6.0
4
+ gradio==3.18.0
5
+ imageio==2.25.0
6
+ imageio-ffmpeg==0.4.8
7
+ kornia==0.6.9
8
+ omegaconf==2.3.0
9
+ open-clip-torch==2.13.0
10
+ opencv-contrib-python==4.7.0.68
11
+ opencv-python-headless==4.7.0.68
12
+ prettytable==3.6.0
13
+ pytorch-lightning==1.9.0
14
+ timm==0.6.12
15
+ torch==1.13.1
16
+ torchvision==0.14.1
17
+ transformers==4.26.1
18
+ yapf==0.32.0
style.css ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ }