Shuang59 commited on
Commit
6048c1c
β€’
1 Parent(s): 7143c43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +397 -29
app.py CHANGED
@@ -7,21 +7,72 @@ Original file is located at
7
  https://colab.research.google.com/drive/19xx6Nu4FeiGj-TzTUFxBf-15IkeuFx_F
8
  """
9
 
10
- import streamlit as st
11
  import gradio as gr
12
  import torch as th
13
 
14
  from composable_diffusion.download import download_model
15
  from composable_diffusion.model_creation import create_model_and_diffusion as create_model_and_diffusion_for_clevr
16
  from composable_diffusion.model_creation import model_and_diffusion_defaults as model_and_diffusion_defaults_for_clevr
17
- from composable_diffusion.composable_stable_diffusion.pipeline_composable_stable_diffusion import ComposableStableDiffusionPipeline
 
18
 
19
- # This notebook supports both CPU and GPU.
20
- # On CPU, generating one sample may take on the order of 20 minutes.
21
- # On a GPU, it should be under a minute.
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  has_cuda = th.cuda.is_available()
24
  device = th.device('cpu' if not th.cuda.is_available() else 'cuda')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  # init stable diffusion model
27
  pipe = ComposableStableDiffusionPipeline.from_pretrained(
@@ -61,6 +112,245 @@ clevr_model.to(device)
61
  clevr_model.load_state_dict(th.load(download_model('clevr_pos'), device))
62
  print('total clevr_pos parameters', sum(x.numel() for x in clevr_model.parameters()))
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  def compose_clevr_objects(prompt, weights, steps):
66
  weights = [float(x.strip()) for x in weights.split('|')]
@@ -123,7 +413,7 @@ def stable_diffusion_compose(prompt, steps, weights, seed):
123
  return image
124
 
125
 
126
- def compose(prompt, weights, version, steps, seed):
127
  try:
128
  with th.no_grad():
129
  if version == 'Stable_Diffusion_1v_4':
@@ -132,35 +422,113 @@ def compose(prompt, weights, version, steps, seed):
132
  else:
133
  return compose_clevr_objects(prompt, weights, steps)
134
  except Exception as e:
135
- print(e)
136
  return None
137
 
 
138
  examples_1 = "A castle in a forest | grainy, fog"
139
  examples_3 = '0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5'
140
  examples_5 = 'a white church | lightning in the background'
141
  examples_6 = 'mystical trees | A dark magical pond | dark'
142
  examples_7 = 'A lake | A mountain | Cherry Blossoms next to the lake'
143
- examples = [
144
- [examples_6, "7.5 | 7.5 | -7.5", 'Stable_Diffusion_1v_4', 50, 8],
145
- [examples_6, "7.5 | 7.5 | 7.5", 'Stable_Diffusion_1v_4', 50, 8],
146
- [examples_1, "7.5 | -7.5", 'Stable_Diffusion_1v_4', 50, 0],
147
- [examples_7, "7.5 | 7.5 | 7.5", 'Stable_Diffusion_1v_4', 50, 3],
148
- [examples_5, "7.5 | 7.5", 'Stable_Diffusion_1v_4', 50, 0],
149
- [examples_3, "7.5 | 7.5 | 7.5 | 7.5 | 7.5", 'CLEVR Objects', 100, 0]
 
150
  ]
151
 
152
- title = 'Compositional Visual Generation with Composable Diffusion Models'
153
- description = '<p>Our conjunction and negation (a.k.a. negative prompts) operators are also added into stable diffusion webui! (<a href="https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Negative-prompt">Negation</a> and <a href="https://github.com/AUTOMATIC1111/stable-diffusion-webui/commit/c26732fbee2a57e621ac22bf70decf7496daa4cd">Conjunction</a>)</p></p><p>See more information from our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</p><ul><li>One version is based on the released <a href="https://github.com/openai/glide-text2im">GLIDE</a> and <a href="https://github.com/CompVis/stable-diffusion/">Stable Diffusion</a> for composing natural language description.</li><li>Another is based on our pre-trained CLEVR Object Model for composing objects. <br>(<b>Note</b>: We recommend using <b><i>x</i></b> in range <b><i>[0.1, 0.9]</i></b> and <b><i>y</i></b> in range <b><i>[0.25, 0.7]</i></b>, since the training dataset labels are in given ranges.)</li></ul><p>When composing multiple sentences, use `|` as the delimiter, see given examples below.</p><p>You can also specify the weight of each text by using `|` as the delimiter. When the weight is negative, it will use Negation Operator (NOT), which indicates the corresponding prompt is a negative prompt. Otherwise it will use Conjunction operator (AND).</p><p><b>Only Conjunction operator is enabled for CLEVR Object.</b></p><p><b>Note: When using Stable Diffusion, black images will be returned if the given prompt is detected as problematic. For composing GLIDE model, we recommend using the Colab demo in our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</b></p>'
154
-
155
- iface = gr.Interface(compose,
156
- inputs=[
157
- gr.Textbox(label='prompt', value='mystical trees | A dark magical pond | dark'),
158
- gr.Textbox(label='weights', value='7.5 | 7.5 | -7.5'),
159
- gr.Radio(['Stable_Diffusion_1v_4', 'CLEVR Objects'], type="value", label='version', value='Stable_Diffusion_1v_4'),
160
- gr.Slider(10, 200, value=50),
161
- gr.Number(8)
162
- ],
163
- outputs='image', cache_examples=False,
164
- title=title, description=description, examples=examples)
165
-
166
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  https://colab.research.google.com/drive/19xx6Nu4FeiGj-TzTUFxBf-15IkeuFx_F
8
  """
9
 
 
10
  import gradio as gr
11
  import torch as th
12
 
13
  from composable_diffusion.download import download_model
14
  from composable_diffusion.model_creation import create_model_and_diffusion as create_model_and_diffusion_for_clevr
15
  from composable_diffusion.model_creation import model_and_diffusion_defaults as model_and_diffusion_defaults_for_clevr
16
+ from composable_diffusion.composable_stable_diffusion.pipeline_composable_stable_diffusion import \
17
+ ComposableStableDiffusionPipeline
18
 
19
+ import os
20
+ import shutil
21
+ import time
22
+ import glob
23
+ import numpy as np
24
+ import open3d as o3d
25
+ import open3d.visualization.rendering as rendering
26
+
27
+ from PIL import Image
28
+ from tqdm.auto import tqdm
29
+ from point_e.diffusion.configs import DIFFUSION_CONFIGS, diffusion_from_config
30
+ from point_e.diffusion.sampler import PointCloudSampler
31
+ from point_e.models.download import load_checkpoint
32
+ from point_e.models.configs import MODEL_CONFIGS, model_from_config
33
+ from point_e.util.pc_to_mesh import marching_cubes_mesh
34
 
35
  has_cuda = th.cuda.is_available()
36
  device = th.device('cpu' if not th.cuda.is_available() else 'cuda')
37
+ print(has_cuda)
38
+
39
+ # init stable diffusion model
40
+ pipe = ComposableStableDiffusionPipeline.from_pretrained(
41
+ "CompVis/stable-diffusion-v1-4",
42
+ ).to(device)
43
+
44
+ pipe.safety_checker = None
45
+
46
+ # create model for CLEVR Objects
47
+ clevr_options = model_and_diffusion_defaults_for_clevr()
48
+
49
+ flags = {
50
+ "image_size": 128,
51
+ "num_channels": 192,
52
+ "num_res_blocks": 2,
53
+ "learn_sigma": True,
54
+ "use_scale_shift_norm": False,
55
+ "raw_unet": True,
56
+ "noise_schedule": "squaredcos_cap_v2",
57
+ "rescale_learned_sigmas": False,
58
+ "rescale_timesteps": False,
59
+ "num_classes": '2',
60
+ "dataset": "clevr_pos",
61
+ "use_fp16": has_cuda,
62
+ "timestep_respacing": '100'
63
+ }
64
+
65
+ for key, val in flags.items():
66
+ clevr_options[key] = val
67
+
68
+ clevr_model, clevr_diffusion = create_model_and_diffusion_for_clevr(**clevr_options)
69
+ clevr_model.eval()
70
+ if has_cuda:
71
+ clevr_model.convert_to_fp16()
72
+
73
+ clevr_model.to(device)
74
+ clevr_model.load_state_dict(th.load(download_model('clevr_pos'), device))
75
+ device = th.device('cpu' if not th.cuda.is_available() else 'cuda')
76
 
77
  # init stable diffusion model
78
  pipe = ComposableStableDiffusionPipeline.from_pretrained(
 
112
  clevr_model.load_state_dict(th.load(download_model('clevr_pos'), device))
113
  print('total clevr_pos parameters', sum(x.numel() for x in clevr_model.parameters()))
114
 
115
+ print('creating base model...')
116
+ base_name = 'base40M-textvec'
117
+ base_model = model_from_config(MODEL_CONFIGS[base_name], device)
118
+ base_model.eval()
119
+ base_diffusion = diffusion_from_config(DIFFUSION_CONFIGS[base_name])
120
+
121
+ print('creating upsample model...')
122
+ upsampler_model = model_from_config(MODEL_CONFIGS['upsample'], device)
123
+ upsampler_model.eval()
124
+ upsampler_diffusion = diffusion_from_config(DIFFUSION_CONFIGS['upsample'])
125
+
126
+ print('downloading base checkpoint...')
127
+ base_model.load_state_dict(load_checkpoint(base_name, device))
128
+
129
+ print('downloading upsampler checkpoint...')
130
+ upsampler_model.load_state_dict(load_checkpoint('upsample', device))
131
+
132
+ print('creating SDF model...')
133
+ name = 'sdf'
134
+ model = model_from_config(MODEL_CONFIGS[name], device)
135
+ model.eval()
136
+
137
+ print('loading SDF model...')
138
+ model.load_state_dict(load_checkpoint(name, device))
139
+
140
+
141
+ def compose_pointe(prompt, weights):
142
+ weight_list = [float(x.strip()) for x in weights.split('|')]
143
+ sampler = PointCloudSampler(
144
+ device=device,
145
+ models=[base_model, upsampler_model],
146
+ diffusions=[base_diffusion, upsampler_diffusion],
147
+ num_points=[1024, 4096 - 1024],
148
+ aux_channels=['R', 'G', 'B'],
149
+ guidance_scale=[weight_list, 0.0],
150
+ model_kwargs_key_filter=('texts', ''), # Do not condition the upsampler at all
151
+ )
152
+
153
+ def generate_pcd(prompt_list):
154
+ # Produce a sample from the model.
155
+ samples = None
156
+ for x in tqdm(sampler.sample_batch_progressive(batch_size=1, model_kwargs=dict(texts=prompt_list))):
157
+ samples = x
158
+ return samples
159
+
160
+ def generate_fig(samples):
161
+ pc = sampler.output_to_point_clouds(samples)[0]
162
+ return pc
163
+
164
+
165
+ # has_cuda = th.cuda.is_available()
166
+ device = th.device('cpu' if not th.cuda.is_available() else 'cuda')
167
+
168
+ # init stable diffusion model
169
+ pipe = ComposableStableDiffusionPipeline.from_pretrained(
170
+ "CompVis/stable-diffusion-v1-4",
171
+ ).to(device)
172
+
173
+ pipe.safety_checker = None
174
+
175
+ # create model for CLEVR Objects
176
+ clevr_options = model_and_diffusion_defaults_for_clevr()
177
+
178
+ flags = {
179
+ "image_size": 128,
180
+ "num_channels": 192,
181
+ "num_res_blocks": 2,
182
+ "learn_sigma": True,
183
+ "use_scale_shift_norm": False,
184
+ "raw_unet": True,
185
+ "noise_schedule": "squaredcos_cap_v2",
186
+ "rescale_learned_sigmas": False,
187
+ "rescale_timesteps": False,
188
+ "num_classes": '2',
189
+ "dataset": "clevr_pos",
190
+ "use_fp16": has_cuda,
191
+ "timestep_respacing": '100'
192
+ }
193
+
194
+ for key, val in flags.items():
195
+ clevr_options[key] = val
196
+
197
+ clevr_model, clevr_diffusion = create_model_and_diffusion_for_clevr(**clevr_options)
198
+ clevr_model.eval()
199
+ if has_cuda:
200
+ clevr_model.convert_to_fp16()
201
+
202
+ clevr_model.to(device)
203
+ clevr_model.load_state_dict(th.load(download_model('clevr_pos'), device))
204
+ device = th.device('cpu' if not th.cuda.is_available() else 'cuda')
205
+
206
+ # init stable diffusion model
207
+ pipe = ComposableStableDiffusionPipeline.from_pretrained(
208
+ "CompVis/stable-diffusion-v1-4",
209
+ ).to(device)
210
+
211
+ pipe.safety_checker = None
212
+
213
+ # create model for CLEVR Objects
214
+ clevr_options = model_and_diffusion_defaults_for_clevr()
215
+
216
+ flags = {
217
+ "image_size": 128,
218
+ "num_channels": 192,
219
+ "num_res_blocks": 2,
220
+ "learn_sigma": True,
221
+ "use_scale_shift_norm": False,
222
+ "raw_unet": True,
223
+ "noise_schedule": "squaredcos_cap_v2",
224
+ "rescale_learned_sigmas": False,
225
+ "rescale_timesteps": False,
226
+ "num_classes": '2',
227
+ "dataset": "clevr_pos",
228
+ "use_fp16": has_cuda,
229
+ "timestep_respacing": '100'
230
+ }
231
+
232
+ for key, val in flags.items():
233
+ clevr_options[key] = val
234
+
235
+ clevr_model, clevr_diffusion = create_model_and_diffusion_for_clevr(**clevr_options)
236
+ clevr_model.eval()
237
+ if has_cuda:
238
+ clevr_model.convert_to_fp16()
239
+
240
+ clevr_model.to(device)
241
+ clevr_model.load_state_dict(th.load(download_model('clevr_pos'), device))
242
+ print('total clevr_pos parameters', sum(x.numel() for x in clevr_model.parameters()))
243
+
244
+ print('creating base model...')
245
+ base_name = 'base40M-textvec'
246
+ base_model = model_from_config(MODEL_CONFIGS[base_name], device)
247
+ base_model.eval()
248
+ base_diffusion = diffusion_from_config(DIFFUSION_CONFIGS[base_name])
249
+
250
+ print('creating upsample model...')
251
+ upsampler_model = model_from_config(MODEL_CONFIGS['upsample'], device)
252
+ upsampler_model.eval()
253
+ upsampler_diffusion = diffusion_from_config(DIFFUSION_CONFIGS['upsample'])
254
+
255
+ print('downloading base checkpoint...')
256
+ base_model.load_state_dict(load_checkpoint(base_name, device))
257
+
258
+ print('downloading upsampler checkpoint...')
259
+ upsampler_model.load_state_dict(load_checkpoint('upsample', device))
260
+
261
+ print('creating SDF model...')
262
+ name = 'sdf'
263
+ model = model_from_config(MODEL_CONFIGS[name], device)
264
+ model.eval()
265
+
266
+ print('loading SDF model...')
267
+ model.load_state_dict(load_checkpoint(name, device))
268
+
269
+
270
+ def compose_pointe(prompt, weights, version):
271
+ weight_list = [float(x.strip()) for x in weights.split('|')]
272
+ sampler = PointCloudSampler(
273
+ device=device,
274
+ models=[base_model, upsampler_model],
275
+ diffusions=[base_diffusion, upsampler_diffusion],
276
+ num_points=[1024, 4096 - 1024],
277
+ aux_channels=['R', 'G', 'B'],
278
+ guidance_scale=[weight_list, 0.0],
279
+ model_kwargs_key_filter=('texts', ''), # Do not condition the upsampler at all
280
+ )
281
+
282
+ def generate_pcd(prompt_list):
283
+ # Produce a sample from the model.
284
+ samples = None
285
+ for x in tqdm(sampler.sample_batch_progressive(batch_size=1, model_kwargs=dict(texts=prompt_list))):
286
+ samples = x
287
+ return samples
288
+
289
+ def generate_fig(samples):
290
+ pc = sampler.output_to_point_clouds(samples)[0]
291
+ return pc
292
+
293
+ def generate_mesh(pc):
294
+ mesh = marching_cubes_mesh(
295
+ pc=pc,
296
+ model=model,
297
+ batch_size=4096,
298
+ grid_size=128, # increase to 128 for resolution used in evals
299
+ progress=True,
300
+ )
301
+ return mesh
302
+
303
+ def generate_video(mesh_path):
304
+ render = rendering.OffscreenRenderer(640, 480)
305
+ mesh = o3d.io.read_triangle_mesh(mesh_path)
306
+ mesh.compute_vertex_normals()
307
+
308
+ mat = o3d.visualization.rendering.MaterialRecord()
309
+ mat.shader = 'defaultLit'
310
+
311
+ render.scene.camera.look_at([0, 0, 0], [1, 1, 1], [0, 0, 1])
312
+ render.scene.add_geometry('mesh', mesh, mat)
313
+
314
+ timestr = time.strftime("%Y%m%d-%H%M%S")
315
+ os.makedirs(timestr, exist_ok=True)
316
+
317
+ def update_geometry():
318
+ render.scene.clear_geometry()
319
+ render.scene.add_geometry('mesh', mesh, mat)
320
+
321
+ def generate_images():
322
+ for i in range(64):
323
+ # Rotation
324
+ R = mesh.get_rotation_matrix_from_xyz((0, 0, np.pi / 32))
325
+ mesh.rotate(R, center=(0, 0, 0))
326
+ # Update geometry
327
+ update_geometry()
328
+ img = render.render_to_image()
329
+ o3d.io.write_image(os.path.join(timestr + "/{:05d}.jpg".format(i)), img, quality=100)
330
+ time.sleep(0.05)
331
+
332
+ generate_images()
333
+ image_list = []
334
+ for filename in sorted(glob.glob(f'{timestr}/*.jpg')): # assuming gif
335
+ im = Image.open(filename)
336
+ image_list.append(im)
337
+ # remove the folder
338
+ shutil.rmtree(timestr)
339
+ return image_list
340
+
341
+ prompt_list = [x.strip() for x in prompt.split("|")]
342
+ pcd = generate_pcd(prompt_list)
343
+ pc = generate_fig(pcd)
344
+ mesh = generate_mesh(pc)
345
+ timestr = time.strftime("%Y%m%d-%H%M%S")
346
+ mesh_path = os.path.join(f'{timestr}.ply')
347
+ with open(mesh_path, 'wb') as f:
348
+ mesh.write_ply(f)
349
+ image_frames = generate_video(mesh_path)
350
+ gif_path = os.path.join(f'{timestr}.gif')
351
+ image_frames[0].save(gif_path, save_all=True, optimizer=False, duration=5, append_images=image_frames[1:], loop=0)
352
+ return f'{timestr}.gif'
353
+
354
 
355
  def compose_clevr_objects(prompt, weights, steps):
356
  weights = [float(x.strip()) for x in weights.split('|')]
 
413
  return image
414
 
415
 
416
+ def compose_2D_diffusion(prompt, weights, version, steps, seed):
417
  try:
418
  with th.no_grad():
419
  if version == 'Stable_Diffusion_1v_4':
 
422
  else:
423
  return compose_clevr_objects(prompt, weights, steps)
424
  except Exception as e:
 
425
  return None
426
 
427
+
428
  examples_1 = "A castle in a forest | grainy, fog"
429
  examples_3 = '0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5'
430
  examples_5 = 'a white church | lightning in the background'
431
  examples_6 = 'mystical trees | A dark magical pond | dark'
432
  examples_7 = 'A lake | A mountain | Cherry Blossoms next to the lake'
433
+
434
+ image_examples = [
435
+ [examples_6, "7.5 | 7.5 | -7.5", 'Stable_Diffusion_1v_4', 50, 8],
436
+ [examples_6, "7.5 | 7.5 | 7.5", 'Stable_Diffusion_1v_4', 50, 8],
437
+ [examples_1, "7.5 | -7.5", 'Stable_Diffusion_1v_4', 50, 0],
438
+ [examples_7, "7.5 | 7.5 | 7.5", 'Stable_Diffusion_1v_4', 50, 3],
439
+ [examples_5, "7.5 | 7.5", 'Stable_Diffusion_1v_4', 50, 0],
440
+ [examples_3, "7.5 | 7.5 | 7.5 | 7.5 | 7.5", 'CLEVR Objects', 100, 0]
441
  ]
442
 
443
+ pointe_examples = [["a cake | a house", "7.5 | 7.5", 'Point-E'],
444
+ ["a green avocado | a chair", "7.5 | 3", 'Point-E'],
445
+ ["a toilet | a chair", "7 | 5", 'Point-E']]
446
+
447
+ with gr.Blocks() as demo:
448
+ gr.Markdown(
449
+ """<h1 style="text-align: center;"><b>Composable Diffusion Models (ECCV
450
+ 2022)</b> - <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion
451
+ -Models/">Project Page</a></h1>""")
452
+ gr.Markdown(
453
+ """<table style="display: inline-table; table-layout: fixed; width: 100%;">
454
+ <tr>
455
+ <td>
456
+ <figure>
457
+ <img src="https://media.giphy.com/media/gKfDjdXy0lbYNyROKo/giphy.gif" style="text-align:center; width:100%; display:block; margin:auto;">
458
+ <figcaption style="color: black; font-size: 15px; text-align: center;">"Mystical trees" <span style="color: red">AND</span> "A magical pond" <span style="color: red">AND</span> "Dark"</figcaption>
459
+ </figure>
460
+ </td>
461
+ <td>
462
+ <figure>
463
+ <img src="https://media.giphy.com/media/sf5m1Z5FldemLMatWn/giphy.gif" style="text-align:center; width:100%; display:block; margin:auto;">
464
+ <figcaption style="color: black; font-size: 15px; text-align: center;">"Mystical trees" <span style="color: red">AND</span> "A magical pond" <span style="color: red">AND NOT</span> "Dark"</figcaption>
465
+ </figure>
466
+ </td>
467
+ <td>
468
+ <figure>
469
+ <img src="https://media.giphy.com/media/lTzdW41bFnrD8AYa0K/giphy.gif" style="text-align:center; width:100%; display:block; margin:auto;">
470
+ <figcaption style="color: black; font-size: 15px; text-align: center;">"A toilet" <span style="color: red">AND</span> "A chair"</figcaption>
471
+ </figure>
472
+ </td>
473
+ <td>
474
+ <figure>
475
+ <img src="https://media.giphy.com/media/nFkMh70kzZCwjbRrx5/giphy.gif" style="text-align:center; width:100%; display:block; margin:auto;">
476
+ <figcaption style="color: black; font-size: 15px; text-align: center;">"A monitor" <span style="color: red">AND</span> "A brown couch"</figcaption>
477
+ </figure>
478
+ </td>
479
+ </tr>
480
+ </table>
481
+ """
482
+ )
483
+ gr.Markdown(
484
+ """<p style="font-size: 18px;">Compositional visual generation by composing pre-trained diffusion models
485
+ using compositional operators, <b>AND</b> and <b>NOT</b>.</p>""")
486
+ gr.Markdown(
487
+ """<p style="font-size: 18px;">When composing multiple inputs, please use <b>β€œ|”</b> to separate them </p>""")
488
+ gr.Markdown(
489
+ """<p>( <b>Note</b>: For composing CLEVR objects, we recommend using <b><i>x</i></b> in range <b><i>[0.1,
490
+ 0.9]</i></b> and <b><i>y</i></b> in range <b><i>[0.25, 0.7]</i></b>, since the training dataset labels are in
491
+ given ranges.)</p><hr>""")
492
+ with gr.Row():
493
+ with gr.Column():
494
+ gr.Markdown(
495
+ """<h4>Composing natural language descriptions / objects for 2D image
496
+ generation</h4>""")
497
+ with gr.Row():
498
+ text_input = gr.Textbox(value="mystical trees | A dark magical pond | dark", label="Text to image prompt")
499
+ weights_input = gr.Textbox(value="7.5 | 7.5 | 7.5", label="Weights")
500
+ with gr.Row():
501
+ seed_input = gr.Number(0, label="Seed")
502
+ steps_input = gr.Slider(10, 200, value=50, label="Steps")
503
+ with gr.Row():
504
+ model_input = gr.Radio(
505
+ ['Stable_Diffusion_1v_4', 'CLEVR Objects'], type="value", label='Text to image model',
506
+ value='Stable_Diffusion_1v_4')
507
+ image_output = gr.Image()
508
+ image_button = gr.Button("Generate")
509
+ img_examples = gr.Examples(
510
+ examples=image_examples,
511
+ inputs=[text_input, weights_input, model_input, steps_input, seed_input]
512
+ )
513
+
514
+ with gr.Column():
515
+ gr.Markdown(
516
+ """<h4>Composing natural language descriptions for 3D asset generation</h4>""")
517
+ with gr.Row():
518
+ asset_input = gr.Textbox(value="a cake | a house", label="Text to 3D prompt")
519
+ with gr.Row():
520
+ asset_weights = gr.Textbox(value="7.5 | 7.5", label="Weights")
521
+ with gr.Row():
522
+ asset_model = gr.Radio(['Point-E'], type="value", label='Text to 3D model', value='Point-E')
523
+ asset_output = gr.Image(label='GIF')
524
+ asset_button = gr.Button("Generate")
525
+ asset_examples = gr.Examples(examples=pointe_examples, inputs=[asset_input, asset_weights, asset_model])
526
+
527
+ image_button.click(compose_2D_diffusion,
528
+ inputs=[text_input, weights_input, model_input, steps_input, seed_input],
529
+ outputs=image_output)
530
+ asset_button.click(compose_pointe, inputs=[asset_input, asset_weights, asset_model], outputs=asset_output)
531
+
532
+ if __name__ == "__main__":
533
+ demo.queue(max_size=5)
534
+ demo.launch(debug=True)