mbrack commited on
Commit
2bb6dee
1 Parent(s): 20a9dda

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +346 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from contextlib import nullcontext
2
+ import gradio as gr
3
+ import torch
4
+ from torch import autocast
5
+ from diffusers import SemanticStableDiffusionPipeline
6
+
7
+ device = "cuda" if torch.cuda.is_available() else "cpu"
8
+
9
+ pipe = SemanticStableDiffusionPipeline.from_pretrained("/runwayml/stable-diffusion-v1-5/")
10
+ pipe = pipe.to(device)
11
+ gen = torch.Generator(device=device)
12
+
13
+ # Sometimes the nsfw checker is confused by the Pokémon images, you can disable
14
+ # it at your own risk here
15
+ disable_safety = False
16
+
17
+ if disable_safety:
18
+ def null_safety(images, **kwargs):
19
+ return images, False
20
+ pipe.safety_checker = null_safety
21
+
22
+
23
+ def infer(prompt, steps, scale, seed, editing_prompt_1 = None, reverse_editing_direction_1 = False, edit_warmup_steps_1=10, edit_guidance_scale_1=5, edit_threshold_1=0.95,
24
+ editing_prompt_2 = None, reverse_editing_direction_2 = False, edit_warmup_steps_2=10, edit_guidance_scale_2=5, edit_threshold_2=0.95,
25
+ edit_momentum_scale=0.5, edit_mom_beta=0.6):
26
+
27
+
28
+ gen.manual_seed(seed)
29
+ images = pipe(prompt, guidance_scale=scale, num_inference_steps=steps, generator=gen).images
30
+
31
+ editing_prompt = [editing_prompt_1, editing_prompt_2]
32
+ reverse_editing_direction = [reverse_editing_direction_1, reverse_editing_direction_2]
33
+ edit_warmup_steps = [edit_warmup_steps_1, edit_warmup_steps_2]
34
+ edit_guidance_scale = [edit_guidance_scale_1, edit_guidance_scale_2]
35
+ edit_threshold = [edit_threshold_1, edit_threshold_2]
36
+
37
+ indices = [ind for ind, val in enumerate(editing_prompt) if val is None or len(val) <= 1]
38
+
39
+ for index in sorted(indices, reverse=True):
40
+ del editing_prompt[index]
41
+ del reverse_editing_direction[index]
42
+ del edit_warmup_steps[index]
43
+ del edit_guidance_scale[index]
44
+ del edit_threshold[index]
45
+
46
+
47
+ gen.manual_seed(seed)
48
+ images.extend(pipe(prompt, guidance_scale=scale, num_inference_steps=steps, generator=gen,
49
+ editing_prompt=editing_prompt, reverse_editing_direction=reverse_editing_direction, edit_warmup_steps=edit_warmup_steps, edit_guidance_scale=edit_guidance_scale,
50
+ edit_momentum_scale=edit_momentum_scale, edit_mom_beta=edit_mom_beta
51
+ ).images)
52
+
53
+ return images
54
+
55
+ css = """
56
+ a {
57
+ color: inherit;
58
+ text-decoration: underline;
59
+ }
60
+ .gradio-container {
61
+ font-family: 'IBM Plex Sans', sans-serif;
62
+ }
63
+ .gr-button {
64
+ color: white;
65
+ border-color: #9d66e5;
66
+ background: #9d66e5;
67
+ }
68
+ input[type='range'] {
69
+ accent-color: #9d66e5;
70
+ }
71
+ .dark input[type='range'] {
72
+ accent-color: #dfdfdf;
73
+ }
74
+ .container {
75
+ max-width: 730px;
76
+ margin: auto;
77
+ padding-top: 1.5rem;
78
+ }
79
+ #gallery {
80
+ min-height: 22rem;
81
+ margin-bottom: 15px;
82
+ margin-left: auto;
83
+ margin-right: auto;
84
+ border-bottom-right-radius: .5rem !important;
85
+ border-bottom-left-radius: .5rem !important;
86
+ }
87
+ #gallery>div>.h-full {
88
+ min-height: 20rem;
89
+ }
90
+ .details:hover {
91
+ text-decoration: underline;
92
+ }
93
+ .gr-button {
94
+ white-space: nowrap;
95
+ }
96
+ .gr-button:focus {
97
+ border-color: rgb(147 197 253 / var(--tw-border-opacity));
98
+ outline: none;
99
+ box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
100
+ --tw-border-opacity: 1;
101
+ --tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
102
+ --tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
103
+ --tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
104
+ --tw-ring-opacity: .5;
105
+ }
106
+ #advanced-options {
107
+ margin-bottom: 20px;
108
+ }
109
+ .footer {
110
+ margin-bottom: 45px;
111
+ margin-top: 35px;
112
+ text-align: center;
113
+ border-bottom: 1px solid #e5e5e5;
114
+ }
115
+ .footer>p {
116
+ font-size: .8rem;
117
+ display: inline-block;
118
+ padding: 0 10px;
119
+ transform: translateY(10px);
120
+ background: white;
121
+ }
122
+
123
+ .dark .footer {
124
+ border-color: #303030;
125
+ }
126
+ .dark .footer>p {
127
+ background: #0b0f19;
128
+ }
129
+ .acknowledgments h4{
130
+ margin: 1.25em 0 .25em 0;
131
+ font-weight: bold;
132
+ font-size: 115%;
133
+ }
134
+ """
135
+
136
+ block = gr.Blocks(css=css)
137
+
138
+ examples = [
139
+ [
140
+ 'a photo of a cat',
141
+ 50,
142
+ 7,
143
+ 3,
144
+ 'sunglasses',
145
+ False,
146
+ 10,
147
+ 6,
148
+ 0.95,
149
+ '',
150
+ False,
151
+ 10,
152
+ 5,
153
+ 0.95
154
+ ],
155
+ [
156
+ 'an image of a crowded boulevard, realistic, 4k',
157
+ 50,
158
+ 7,
159
+ 9,
160
+ 'crowd, crowded, people',
161
+ True,
162
+ 10,
163
+ 8.3,
164
+ 0.9,
165
+ '',
166
+ False,
167
+ 10,
168
+ 5,
169
+ 0.95
170
+ ],
171
+ [
172
+ 'a castle next to a river',
173
+ 50,
174
+ 7,
175
+ 48,
176
+ 'boat on a river',
177
+ False,
178
+ 15,
179
+ 6,
180
+ 0.9,
181
+ 'monet, impression, sunrise',
182
+ False,
183
+ 18,
184
+ 6,
185
+ 0.8
186
+ ],
187
+ [
188
+ 'a portrait of a king, full body shot, 8k',
189
+ 50,
190
+ 7,
191
+ 33,
192
+ 'male',
193
+ True,
194
+ 5,
195
+ 5,
196
+ 0.9,
197
+ 'female',
198
+ False,
199
+ 5,
200
+ 5,
201
+ 0.9
202
+ ],
203
+ [
204
+ 'a photo of a flowerpot',
205
+ 50,
206
+ 7,
207
+ 2,
208
+ 'glasses',
209
+ False,
210
+ 12,
211
+ 5,
212
+ 0.975,
213
+ '',
214
+ False,
215
+ 10,
216
+ 5,
217
+ 0.95
218
+ ],
219
+ [
220
+ 'a photo of the face of a woman',
221
+ 50,
222
+ 7,
223
+ 21,
224
+ 'smiling, smile',
225
+ False,
226
+ 15,
227
+ 3,
228
+ 0.99,
229
+ 'curls, wavy hair, curly hair',
230
+ False,
231
+ 13,
232
+ 3,
233
+ 0.925
234
+ ],
235
+ ]
236
+
237
+ with block:
238
+ gr.HTML(
239
+ """
240
+ <div style="text-align: center; max-width: 750px; margin: 0 auto;">
241
+ <div>
242
+ <img class="logo" src="https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1666181274838-62fa1d95e8c9c532aa75331c.png" alt="AIML Logo"
243
+ style="margin: auto; max-width: 7rem;">
244
+ <h1 style="font-weight: 900; font-size: 3rem;">
245
+ Semantic Guidance for Diffusion
246
+ </h1>
247
+ </div>
248
+ <p style="margin-bottom: 10px; font-size: 94%">
249
+ Interact with semantic concepts during the diffusion process. Details can be found in the paper <a href="https://arxiv.org/abs/2301.12247" style="text-decoration: underline;" target="_blank">SEGA: Instructing Diffusion using Semantic Dimensions</a>. <br/> Simply use the edit prompts to make arbitrary changes to the generation.
250
+ </p>
251
+ </div>
252
+ """
253
+ )
254
+ with gr.Group():
255
+ with gr.Box():
256
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
257
+ text = gr.Textbox(
258
+ label="Enter your prompt",
259
+ show_label=False,
260
+ max_lines=1,
261
+ placeholder="Enter your prompt",
262
+ ).style(
263
+ border=(True, False, True, True),
264
+ rounded=(True, False, False, True),
265
+ container=False,
266
+ )
267
+ btn = gr.Button("Generate image").style(
268
+ margin=False,
269
+ rounded=(False, True, True, False),
270
+ )
271
+ with gr.Box():
272
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
273
+ edit_1 = gr.Textbox(
274
+ label="Edit Prompt 1",
275
+ show_label=False,
276
+ max_lines=1,
277
+ placeholder="Enter your 1st edit prompt",
278
+ ).style(
279
+ border=(True, False, True, True),
280
+ rounded=(True, False, False, True),
281
+ container=False,
282
+ )
283
+ with gr.Group():
284
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
285
+ rev_1 = gr.Checkbox(
286
+ label='Reverse')
287
+ warmup_1 = gr.Slider(label='Warmup', minimum=0, maximum=50, value=10, step=1, interactive=True)
288
+ scale_1 = gr.Slider(label='Scale', minimum=1, maximum=10, value=5, step=0.25, interactive=True)
289
+ threshold_1 = gr.Slider(label='Threshold', minimum=0.5, maximum=0.99, value=0.95, steps=0.01, interactive=True)
290
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
291
+ edit_2 = gr.Textbox(
292
+ label="Edit Prompt 2",
293
+ show_label=False,
294
+ max_lines=1,
295
+ placeholder="Enter your 2nd edit prompt",
296
+ ).style(
297
+ border=(True, False, True, True),
298
+ rounded=(True, False, False, True),
299
+ container=False,
300
+ )
301
+ with gr.Group():
302
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
303
+ rev_2 = gr.Checkbox(
304
+ label='Reverse')
305
+ warmup_2 = gr.Slider(label='Warmup', minimum=0, maximum=50, value=10, step=1, interactive=True)
306
+ scale_2 = gr.Slider(label='Scale', minimum=1, maximum=10, value=5, step=0.25, interactive=True)
307
+ threshold_2 = gr.Slider(label='Threshold', minimum=0.5, maximum=0.99, value=0.95, steps=0.01, interactive=True)
308
+
309
+
310
+
311
+ gallery = gr.Gallery(
312
+ label="Generated images", show_label=False, elem_id="gallery"
313
+ ).style(grid=[2], height="auto")
314
+
315
+
316
+ with gr.Row(elem_id="advanced-options"):
317
+ scale = gr.Slider(label="Scale", minimum=3, maximum=15, value=7, step=1)
318
+ steps = gr.Slider(label="Steps", minimum=5, maximum=50, value=50, step=5, interactive=False)
319
+ seed = gr.Slider(
320
+ label="Seed",
321
+ minimum=0,
322
+ maximum=2147483647,
323
+ step=1,
324
+ #randomize=True,
325
+ )
326
+
327
+
328
+ ex = gr.Examples(examples=examples, fn=infer, inputs=[text, steps, scale, seed, edit_1, rev_1, warmup_1, scale_1, threshold_1, edit_2, rev_2, warmup_2, scale_2, threshold_2], outputs=gallery, cache_examples=False)
329
+ ex.dataset.headers = [""]
330
+
331
+
332
+ text.submit(infer, inputs=[text, steps, scale, seed, edit_1, rev_1, warmup_1, scale_1, threshold_1, edit_2, rev_2, warmup_2, scale_2, threshold_2], outputs=gallery)
333
+ btn.click(infer, inputs=[text, steps, scale, seed, edit_1, rev_1, warmup_1, scale_1, threshold_1, edit_2, rev_2, warmup_2, scale_2, threshold_2], outputs=gallery)
334
+ gr.HTML(
335
+ """
336
+ <div class="footer">
337
+ <p> Gradio Demo by AIML@TU Darmstadt and 🤗 Hugging Face
338
+ </p>
339
+ </div>
340
+ <div class="acknowledgments">
341
+ <p>Created by <a href="https://www.aiml.informatik.tu-darmstadt.de/people/mbrack/">Manuel Brack</a> and <a href="justinpinkney.com">Patrick Schramowski</a> at <a href="https://www.aiml.informatik.tu-darmstadt.de">AIML Lab</a>.</p>
342
+ </div>
343
+ """
344
+ )
345
+
346
+ block.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ git+https://github.com/ml-research/diffusers.git@semantic-guidance
2
+ ftfy