Spaces:
Runtime error
Runtime error
Initial commit
Browse files- app.py +346 -0
- requirements.txt +2 -0
app.py
ADDED
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from contextlib import nullcontext
|
2 |
+
import gradio as gr
|
3 |
+
import torch
|
4 |
+
from torch import autocast
|
5 |
+
from diffusers import SemanticStableDiffusionPipeline
|
6 |
+
|
7 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
8 |
+
|
9 |
+
pipe = SemanticStableDiffusionPipeline.from_pretrained("/runwayml/stable-diffusion-v1-5/")
|
10 |
+
pipe = pipe.to(device)
|
11 |
+
gen = torch.Generator(device=device)
|
12 |
+
|
13 |
+
# Sometimes the nsfw checker is confused by the Pokémon images, you can disable
|
14 |
+
# it at your own risk here
|
15 |
+
disable_safety = False
|
16 |
+
|
17 |
+
if disable_safety:
|
18 |
+
def null_safety(images, **kwargs):
|
19 |
+
return images, False
|
20 |
+
pipe.safety_checker = null_safety
|
21 |
+
|
22 |
+
|
23 |
+
def infer(prompt, steps, scale, seed, editing_prompt_1 = None, reverse_editing_direction_1 = False, edit_warmup_steps_1=10, edit_guidance_scale_1=5, edit_threshold_1=0.95,
|
24 |
+
editing_prompt_2 = None, reverse_editing_direction_2 = False, edit_warmup_steps_2=10, edit_guidance_scale_2=5, edit_threshold_2=0.95,
|
25 |
+
edit_momentum_scale=0.5, edit_mom_beta=0.6):
|
26 |
+
|
27 |
+
|
28 |
+
gen.manual_seed(seed)
|
29 |
+
images = pipe(prompt, guidance_scale=scale, num_inference_steps=steps, generator=gen).images
|
30 |
+
|
31 |
+
editing_prompt = [editing_prompt_1, editing_prompt_2]
|
32 |
+
reverse_editing_direction = [reverse_editing_direction_1, reverse_editing_direction_2]
|
33 |
+
edit_warmup_steps = [edit_warmup_steps_1, edit_warmup_steps_2]
|
34 |
+
edit_guidance_scale = [edit_guidance_scale_1, edit_guidance_scale_2]
|
35 |
+
edit_threshold = [edit_threshold_1, edit_threshold_2]
|
36 |
+
|
37 |
+
indices = [ind for ind, val in enumerate(editing_prompt) if val is None or len(val) <= 1]
|
38 |
+
|
39 |
+
for index in sorted(indices, reverse=True):
|
40 |
+
del editing_prompt[index]
|
41 |
+
del reverse_editing_direction[index]
|
42 |
+
del edit_warmup_steps[index]
|
43 |
+
del edit_guidance_scale[index]
|
44 |
+
del edit_threshold[index]
|
45 |
+
|
46 |
+
|
47 |
+
gen.manual_seed(seed)
|
48 |
+
images.extend(pipe(prompt, guidance_scale=scale, num_inference_steps=steps, generator=gen,
|
49 |
+
editing_prompt=editing_prompt, reverse_editing_direction=reverse_editing_direction, edit_warmup_steps=edit_warmup_steps, edit_guidance_scale=edit_guidance_scale,
|
50 |
+
edit_momentum_scale=edit_momentum_scale, edit_mom_beta=edit_mom_beta
|
51 |
+
).images)
|
52 |
+
|
53 |
+
return images
|
54 |
+
|
55 |
+
css = """
|
56 |
+
a {
|
57 |
+
color: inherit;
|
58 |
+
text-decoration: underline;
|
59 |
+
}
|
60 |
+
.gradio-container {
|
61 |
+
font-family: 'IBM Plex Sans', sans-serif;
|
62 |
+
}
|
63 |
+
.gr-button {
|
64 |
+
color: white;
|
65 |
+
border-color: #9d66e5;
|
66 |
+
background: #9d66e5;
|
67 |
+
}
|
68 |
+
input[type='range'] {
|
69 |
+
accent-color: #9d66e5;
|
70 |
+
}
|
71 |
+
.dark input[type='range'] {
|
72 |
+
accent-color: #dfdfdf;
|
73 |
+
}
|
74 |
+
.container {
|
75 |
+
max-width: 730px;
|
76 |
+
margin: auto;
|
77 |
+
padding-top: 1.5rem;
|
78 |
+
}
|
79 |
+
#gallery {
|
80 |
+
min-height: 22rem;
|
81 |
+
margin-bottom: 15px;
|
82 |
+
margin-left: auto;
|
83 |
+
margin-right: auto;
|
84 |
+
border-bottom-right-radius: .5rem !important;
|
85 |
+
border-bottom-left-radius: .5rem !important;
|
86 |
+
}
|
87 |
+
#gallery>div>.h-full {
|
88 |
+
min-height: 20rem;
|
89 |
+
}
|
90 |
+
.details:hover {
|
91 |
+
text-decoration: underline;
|
92 |
+
}
|
93 |
+
.gr-button {
|
94 |
+
white-space: nowrap;
|
95 |
+
}
|
96 |
+
.gr-button:focus {
|
97 |
+
border-color: rgb(147 197 253 / var(--tw-border-opacity));
|
98 |
+
outline: none;
|
99 |
+
box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
|
100 |
+
--tw-border-opacity: 1;
|
101 |
+
--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
|
102 |
+
--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
|
103 |
+
--tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
|
104 |
+
--tw-ring-opacity: .5;
|
105 |
+
}
|
106 |
+
#advanced-options {
|
107 |
+
margin-bottom: 20px;
|
108 |
+
}
|
109 |
+
.footer {
|
110 |
+
margin-bottom: 45px;
|
111 |
+
margin-top: 35px;
|
112 |
+
text-align: center;
|
113 |
+
border-bottom: 1px solid #e5e5e5;
|
114 |
+
}
|
115 |
+
.footer>p {
|
116 |
+
font-size: .8rem;
|
117 |
+
display: inline-block;
|
118 |
+
padding: 0 10px;
|
119 |
+
transform: translateY(10px);
|
120 |
+
background: white;
|
121 |
+
}
|
122 |
+
|
123 |
+
.dark .footer {
|
124 |
+
border-color: #303030;
|
125 |
+
}
|
126 |
+
.dark .footer>p {
|
127 |
+
background: #0b0f19;
|
128 |
+
}
|
129 |
+
.acknowledgments h4{
|
130 |
+
margin: 1.25em 0 .25em 0;
|
131 |
+
font-weight: bold;
|
132 |
+
font-size: 115%;
|
133 |
+
}
|
134 |
+
"""
|
135 |
+
|
136 |
+
block = gr.Blocks(css=css)
|
137 |
+
|
138 |
+
examples = [
|
139 |
+
[
|
140 |
+
'a photo of a cat',
|
141 |
+
50,
|
142 |
+
7,
|
143 |
+
3,
|
144 |
+
'sunglasses',
|
145 |
+
False,
|
146 |
+
10,
|
147 |
+
6,
|
148 |
+
0.95,
|
149 |
+
'',
|
150 |
+
False,
|
151 |
+
10,
|
152 |
+
5,
|
153 |
+
0.95
|
154 |
+
],
|
155 |
+
[
|
156 |
+
'an image of a crowded boulevard, realistic, 4k',
|
157 |
+
50,
|
158 |
+
7,
|
159 |
+
9,
|
160 |
+
'crowd, crowded, people',
|
161 |
+
True,
|
162 |
+
10,
|
163 |
+
8.3,
|
164 |
+
0.9,
|
165 |
+
'',
|
166 |
+
False,
|
167 |
+
10,
|
168 |
+
5,
|
169 |
+
0.95
|
170 |
+
],
|
171 |
+
[
|
172 |
+
'a castle next to a river',
|
173 |
+
50,
|
174 |
+
7,
|
175 |
+
48,
|
176 |
+
'boat on a river',
|
177 |
+
False,
|
178 |
+
15,
|
179 |
+
6,
|
180 |
+
0.9,
|
181 |
+
'monet, impression, sunrise',
|
182 |
+
False,
|
183 |
+
18,
|
184 |
+
6,
|
185 |
+
0.8
|
186 |
+
],
|
187 |
+
[
|
188 |
+
'a portrait of a king, full body shot, 8k',
|
189 |
+
50,
|
190 |
+
7,
|
191 |
+
33,
|
192 |
+
'male',
|
193 |
+
True,
|
194 |
+
5,
|
195 |
+
5,
|
196 |
+
0.9,
|
197 |
+
'female',
|
198 |
+
False,
|
199 |
+
5,
|
200 |
+
5,
|
201 |
+
0.9
|
202 |
+
],
|
203 |
+
[
|
204 |
+
'a photo of a flowerpot',
|
205 |
+
50,
|
206 |
+
7,
|
207 |
+
2,
|
208 |
+
'glasses',
|
209 |
+
False,
|
210 |
+
12,
|
211 |
+
5,
|
212 |
+
0.975,
|
213 |
+
'',
|
214 |
+
False,
|
215 |
+
10,
|
216 |
+
5,
|
217 |
+
0.95
|
218 |
+
],
|
219 |
+
[
|
220 |
+
'a photo of the face of a woman',
|
221 |
+
50,
|
222 |
+
7,
|
223 |
+
21,
|
224 |
+
'smiling, smile',
|
225 |
+
False,
|
226 |
+
15,
|
227 |
+
3,
|
228 |
+
0.99,
|
229 |
+
'curls, wavy hair, curly hair',
|
230 |
+
False,
|
231 |
+
13,
|
232 |
+
3,
|
233 |
+
0.925
|
234 |
+
],
|
235 |
+
]
|
236 |
+
|
237 |
+
with block:
|
238 |
+
gr.HTML(
|
239 |
+
"""
|
240 |
+
<div style="text-align: center; max-width: 750px; margin: 0 auto;">
|
241 |
+
<div>
|
242 |
+
<img class="logo" src="https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1666181274838-62fa1d95e8c9c532aa75331c.png" alt="AIML Logo"
|
243 |
+
style="margin: auto; max-width: 7rem;">
|
244 |
+
<h1 style="font-weight: 900; font-size: 3rem;">
|
245 |
+
Semantic Guidance for Diffusion
|
246 |
+
</h1>
|
247 |
+
</div>
|
248 |
+
<p style="margin-bottom: 10px; font-size: 94%">
|
249 |
+
Interact with semantic concepts during the diffusion process. Details can be found in the paper <a href="https://arxiv.org/abs/2301.12247" style="text-decoration: underline;" target="_blank">SEGA: Instructing Diffusion using Semantic Dimensions</a>. <br/> Simply use the edit prompts to make arbitrary changes to the generation.
|
250 |
+
</p>
|
251 |
+
</div>
|
252 |
+
"""
|
253 |
+
)
|
254 |
+
with gr.Group():
|
255 |
+
with gr.Box():
|
256 |
+
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
257 |
+
text = gr.Textbox(
|
258 |
+
label="Enter your prompt",
|
259 |
+
show_label=False,
|
260 |
+
max_lines=1,
|
261 |
+
placeholder="Enter your prompt",
|
262 |
+
).style(
|
263 |
+
border=(True, False, True, True),
|
264 |
+
rounded=(True, False, False, True),
|
265 |
+
container=False,
|
266 |
+
)
|
267 |
+
btn = gr.Button("Generate image").style(
|
268 |
+
margin=False,
|
269 |
+
rounded=(False, True, True, False),
|
270 |
+
)
|
271 |
+
with gr.Box():
|
272 |
+
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
273 |
+
edit_1 = gr.Textbox(
|
274 |
+
label="Edit Prompt 1",
|
275 |
+
show_label=False,
|
276 |
+
max_lines=1,
|
277 |
+
placeholder="Enter your 1st edit prompt",
|
278 |
+
).style(
|
279 |
+
border=(True, False, True, True),
|
280 |
+
rounded=(True, False, False, True),
|
281 |
+
container=False,
|
282 |
+
)
|
283 |
+
with gr.Group():
|
284 |
+
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
285 |
+
rev_1 = gr.Checkbox(
|
286 |
+
label='Reverse')
|
287 |
+
warmup_1 = gr.Slider(label='Warmup', minimum=0, maximum=50, value=10, step=1, interactive=True)
|
288 |
+
scale_1 = gr.Slider(label='Scale', minimum=1, maximum=10, value=5, step=0.25, interactive=True)
|
289 |
+
threshold_1 = gr.Slider(label='Threshold', minimum=0.5, maximum=0.99, value=0.95, steps=0.01, interactive=True)
|
290 |
+
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
291 |
+
edit_2 = gr.Textbox(
|
292 |
+
label="Edit Prompt 2",
|
293 |
+
show_label=False,
|
294 |
+
max_lines=1,
|
295 |
+
placeholder="Enter your 2nd edit prompt",
|
296 |
+
).style(
|
297 |
+
border=(True, False, True, True),
|
298 |
+
rounded=(True, False, False, True),
|
299 |
+
container=False,
|
300 |
+
)
|
301 |
+
with gr.Group():
|
302 |
+
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
303 |
+
rev_2 = gr.Checkbox(
|
304 |
+
label='Reverse')
|
305 |
+
warmup_2 = gr.Slider(label='Warmup', minimum=0, maximum=50, value=10, step=1, interactive=True)
|
306 |
+
scale_2 = gr.Slider(label='Scale', minimum=1, maximum=10, value=5, step=0.25, interactive=True)
|
307 |
+
threshold_2 = gr.Slider(label='Threshold', minimum=0.5, maximum=0.99, value=0.95, steps=0.01, interactive=True)
|
308 |
+
|
309 |
+
|
310 |
+
|
311 |
+
gallery = gr.Gallery(
|
312 |
+
label="Generated images", show_label=False, elem_id="gallery"
|
313 |
+
).style(grid=[2], height="auto")
|
314 |
+
|
315 |
+
|
316 |
+
with gr.Row(elem_id="advanced-options"):
|
317 |
+
scale = gr.Slider(label="Scale", minimum=3, maximum=15, value=7, step=1)
|
318 |
+
steps = gr.Slider(label="Steps", minimum=5, maximum=50, value=50, step=5, interactive=False)
|
319 |
+
seed = gr.Slider(
|
320 |
+
label="Seed",
|
321 |
+
minimum=0,
|
322 |
+
maximum=2147483647,
|
323 |
+
step=1,
|
324 |
+
#randomize=True,
|
325 |
+
)
|
326 |
+
|
327 |
+
|
328 |
+
ex = gr.Examples(examples=examples, fn=infer, inputs=[text, steps, scale, seed, edit_1, rev_1, warmup_1, scale_1, threshold_1, edit_2, rev_2, warmup_2, scale_2, threshold_2], outputs=gallery, cache_examples=False)
|
329 |
+
ex.dataset.headers = [""]
|
330 |
+
|
331 |
+
|
332 |
+
text.submit(infer, inputs=[text, steps, scale, seed, edit_1, rev_1, warmup_1, scale_1, threshold_1, edit_2, rev_2, warmup_2, scale_2, threshold_2], outputs=gallery)
|
333 |
+
btn.click(infer, inputs=[text, steps, scale, seed, edit_1, rev_1, warmup_1, scale_1, threshold_1, edit_2, rev_2, warmup_2, scale_2, threshold_2], outputs=gallery)
|
334 |
+
gr.HTML(
|
335 |
+
"""
|
336 |
+
<div class="footer">
|
337 |
+
<p> Gradio Demo by AIML@TU Darmstadt and 🤗 Hugging Face
|
338 |
+
</p>
|
339 |
+
</div>
|
340 |
+
<div class="acknowledgments">
|
341 |
+
<p>Created by <a href="https://www.aiml.informatik.tu-darmstadt.de/people/mbrack/">Manuel Brack</a> and <a href="justinpinkney.com">Patrick Schramowski</a> at <a href="https://www.aiml.informatik.tu-darmstadt.de">AIML Lab</a>.</p>
|
342 |
+
</div>
|
343 |
+
"""
|
344 |
+
)
|
345 |
+
|
346 |
+
block.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
git+https://github.com/ml-research/diffusers.git@semantic-guidance
|
2 |
+
ftfy
|