Shuang59 commited on
Commit
4ab31f0
β€’
1 Parent(s): fbe6023

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -31
app.py CHANGED
@@ -14,9 +14,7 @@ import torch as th
14
  from composable_diffusion.download import download_model
15
  from composable_diffusion.model_creation import create_model_and_diffusion as create_model_and_diffusion_for_clevr
16
  from composable_diffusion.model_creation import model_and_diffusion_defaults as model_and_diffusion_defaults_for_clevr
17
-
18
- from torch import autocast
19
- from composable_stable_diffusion_pipeline import ComposableStableDiffusionPipeline
20
 
21
  # This notebook supports both CPU and GPU.
22
  # On CPU, generating one sample may take on the order of 20 minutes.
@@ -24,7 +22,6 @@ from composable_stable_diffusion_pipeline import ComposableStableDiffusionPipeli
24
 
25
  has_cuda = th.cuda.is_available()
26
  device = th.device('cpu' if not th.cuda.is_available() else 'cuda')
27
- print(device)
28
 
29
  # init stable diffusion model
30
  pipe = ComposableStableDiffusionPipeline.from_pretrained(
@@ -32,9 +29,7 @@ pipe = ComposableStableDiffusionPipeline.from_pretrained(
32
  use_auth_token=st.secrets["USER_TOKEN"]
33
  ).to(device)
34
 
35
- def dummy(images, **kwargs):
36
- return images, False
37
- pipe.safety_checker = dummy
38
 
39
  # create model for CLEVR Objects
40
  clevr_options = model_and_diffusion_defaults_for_clevr()
@@ -68,9 +63,14 @@ clevr_model.load_state_dict(th.load(download_model('clevr_pos'), device))
68
  print('total clevr_pos parameters', sum(x.numel() for x in clevr_model.parameters()))
69
 
70
 
71
- def compose_clevr_objects(prompt, guidance_scale, steps):
72
- coordinates = [[float(x.split(',')[0].strip()), float(x.split(',')[1].strip())]
73
- for x in prompt.split('|')]
 
 
 
 
 
74
  coordinates += [[-1, -1]] # add unconditional score label
75
  batch_size = 1
76
 
@@ -83,9 +83,9 @@ def compose_clevr_objects(prompt, guidance_scale, steps):
83
  model_out = clevr_model(combined, ts, **kwargs)
84
  eps, rest = model_out[:, :3], model_out[:, 3:]
85
  masks = kwargs.get('masks')
86
- cond_eps = eps[masks].mean(dim=0, keepdim=True)
87
- uncond_eps = eps[~masks].mean(dim=0, keepdim=True)
88
- half_eps = uncond_eps + guidance_scale * (cond_eps - uncond_eps)
89
  eps = th.cat([half_eps] * x_t.size(0), dim=0)
90
  return th.cat([eps, rest], dim=1)
91
 
@@ -116,38 +116,38 @@ def compose_clevr_objects(prompt, guidance_scale, steps):
116
  return out_img
117
 
118
 
119
- def stable_diffusion_compose(prompt, scale, steps, weights, seed):
120
  generator = th.Generator("cuda").manual_seed(int(seed))
121
- with autocast('cpu' if not th.cuda.is_available() else 'cuda'):
122
- image = pipe(prompt, guidance_scale=scale, num_inference_steps=steps,
123
- weights=weights, generator=generator).images[0]
124
- image.save(f'{"_".join(prompt.split())}.png')
125
- return image
126
 
127
 
128
- def compose(prompt, weights, version, guidance_scale, steps, seed):
129
  try:
130
  with th.no_grad():
131
  if version == 'Stable_Diffusion_1v_4':
132
- return stable_diffusion_compose(prompt, guidance_scale, steps, weights, seed)
 
133
  else:
134
- return compose_clevr_objects(prompt, guidance_scale, steps)
135
  except Exception as e:
136
  print(e)
137
  return None
138
 
139
  examples_1 = "A castle in a forest | grainy, fog"
140
- examples_2 = 'A blue sky | A mountain in the horizon | Cherry Blossoms in front of the mountain'
141
  examples_3 = '0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5'
142
  examples_5 = 'a white church | lightning in the background'
143
  examples_6 = 'mystical trees | A dark magical pond | dark'
144
  examples_7 = 'A lake | A mountain | Cherry Blossoms next to the lake'
145
  examples = [
146
- [examples_1, "1 | -1", 'Stable_Diffusion_1v_4', 15, 50, 0],
147
- [examples_7, "1 | 1 | 1", 'Stable_Diffusion_1v_4', 15, 50, 3],
148
- [examples_5, "1 | 1", 'Stable_Diffusion_1v_4', 15, 50, 0],
149
- [examples_6, "1 | 1 | -1", 'Stable_Diffusion_1v_4', 15, 50, 2],
150
- [examples_3, "1 | 1 | 1 | 1 | 1", 'CLEVR Objects', 10, 100, 0]
 
151
  ]
152
 
153
  title = 'Compositional Visual Generation with Composable Diffusion Models'
@@ -156,13 +156,12 @@ description = '<p>Our conjunction and negation (a.k.a. negative prompts) operato
156
  iface = gr.Interface(compose,
157
  inputs=[
158
  gr.Textbox(label='prompt', value='mystical trees | A dark magical pond | dark'),
159
- gr.Textbox(label='weights', value='1 | 1 | -1'),
160
  gr.Radio(['Stable_Diffusion_1v_4', 'CLEVR Objects'], type="value", label='version', value='Stable_Diffusion_1v_4'),
161
- gr.Slider(2, 30, value=15),
162
  gr.Slider(10, 200, value=50),
163
  gr.Number(2)
164
  ],
165
  outputs='image', cache_examples=False,
166
  title=title, description=description, examples=examples)
167
 
168
- iface.launch()
 
14
  from composable_diffusion.download import download_model
15
  from composable_diffusion.model_creation import create_model_and_diffusion as create_model_and_diffusion_for_clevr
16
  from composable_diffusion.model_creation import model_and_diffusion_defaults as model_and_diffusion_defaults_for_clevr
17
+ from composable_diffusion.composable_stable_diffusion.pipeline_composable_stable_diffusion import ComposableStableDiffusionPipeline
 
 
18
 
19
  # This notebook supports both CPU and GPU.
20
  # On CPU, generating one sample may take on the order of 20 minutes.
 
22
 
23
  has_cuda = th.cuda.is_available()
24
  device = th.device('cpu' if not th.cuda.is_available() else 'cuda')
 
25
 
26
  # init stable diffusion model
27
  pipe = ComposableStableDiffusionPipeline.from_pretrained(
 
29
  use_auth_token=st.secrets["USER_TOKEN"]
30
  ).to(device)
31
 
32
+ pipe.safety_checker = None
 
 
33
 
34
  # create model for CLEVR Objects
35
  clevr_options = model_and_diffusion_defaults_for_clevr()
 
63
  print('total clevr_pos parameters', sum(x.numel() for x in clevr_model.parameters()))
64
 
65
 
66
+ def compose_clevr_objects(prompt, weights, steps):
67
+ weights = [float(x.strip()) for x in weights.split('|')]
68
+ weights = th.tensor(weights, device=device).reshape(-1, 1, 1, 1)
69
+ coordinates = [
70
+ [
71
+ float(x.split(',')[0].strip()), float(x.split(',')[1].strip())]
72
+ for x in prompt.split('|')
73
+ ]
74
  coordinates += [[-1, -1]] # add unconditional score label
75
  batch_size = 1
76
 
 
83
  model_out = clevr_model(combined, ts, **kwargs)
84
  eps, rest = model_out[:, :3], model_out[:, 3:]
85
  masks = kwargs.get('masks')
86
+ cond_eps = eps[masks]
87
+ uncond_eps = eps[~masks]
88
+ half_eps = uncond_eps + (weights * (cond_eps - uncond_eps)).sum(dim=0, keepdims=True)
89
  eps = th.cat([half_eps] * x_t.size(0), dim=0)
90
  return th.cat([eps, rest], dim=1)
91
 
 
116
  return out_img
117
 
118
 
119
+ def stable_diffusion_compose(prompt, steps, weights, seed):
120
  generator = th.Generator("cuda").manual_seed(int(seed))
121
+ image = pipe(prompt, guidance_scale=7.5, num_inference_steps=steps,
122
+ weights=weights, generator=generator).images[0]
123
+ image.save(f'{"_".join(prompt.split())}.png')
124
+ return image
 
125
 
126
 
127
+ def compose(prompt, weights, version, steps, seed):
128
  try:
129
  with th.no_grad():
130
  if version == 'Stable_Diffusion_1v_4':
131
+ res = stable_diffusion_compose(prompt, steps, weights, seed)
132
+ return res
133
  else:
134
+ return compose_clevr_objects(prompt, weights, steps)
135
  except Exception as e:
136
  print(e)
137
  return None
138
 
139
  examples_1 = "A castle in a forest | grainy, fog"
 
140
  examples_3 = '0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5'
141
  examples_5 = 'a white church | lightning in the background'
142
  examples_6 = 'mystical trees | A dark magical pond | dark'
143
  examples_7 = 'A lake | A mountain | Cherry Blossoms next to the lake'
144
  examples = [
145
+ [examples_6, "7.5 | 7.5 | -7.5", 'Stable_Diffusion_1v_4', 50, 8],
146
+ [examples_6, "7.5 | 7.5 | 7.5", 'Stable_Diffusion_1v_4', 50, 8],
147
+ [examples_1, "7.5 | -7.5", 'Stable_Diffusion_1v_4', 50, 0],
148
+ [examples_7, "7.5 | 7.5 | 7.5", 'Stable_Diffusion_1v_4', 50, 3],
149
+ [examples_5, "7.5 | 7.5", 'Stable_Diffusion_1v_4', 50, 0],
150
+ [examples_3, "7.5 | 7.5 | 7.5 | 7.5 | 7.5", 'CLEVR Objects', 100, 0]
151
  ]
152
 
153
  title = 'Compositional Visual Generation with Composable Diffusion Models'
 
156
  iface = gr.Interface(compose,
157
  inputs=[
158
  gr.Textbox(label='prompt', value='mystical trees | A dark magical pond | dark'),
159
+ gr.Textbox(label='weights', value='7.5 | 7.5 | -7.5'),
160
  gr.Radio(['Stable_Diffusion_1v_4', 'CLEVR Objects'], type="value", label='version', value='Stable_Diffusion_1v_4'),
 
161
  gr.Slider(10, 200, value=50),
162
  gr.Number(2)
163
  ],
164
  outputs='image', cache_examples=False,
165
  title=title, description=description, examples=examples)
166
 
167
+ iface.launch()