Shuang59 commited on
Commit
e6ffe8d
β€’
1 Parent(s): 471b0b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -16
app.py CHANGED
@@ -29,9 +29,12 @@ print(device)
29
  # init stable diffusion model
30
  pipe = ComposableStableDiffusionPipeline.from_pretrained(
31
  "CompVis/stable-diffusion-v1-4",
32
- use_auth_token=st.secrets["USER_TOKEN"]
33
  ).to(device)
34
 
 
 
 
35
 
36
  # create model for CLEVR Objects
37
  clevr_options = model_and_diffusion_defaults_for_clevr()
@@ -113,48 +116,54 @@ def compose_clevr_objects(prompt, guidance_scale, steps):
113
  return out_img
114
 
115
 
116
- def stable_diffusion_compose(prompt, scale, steps):
 
117
  with autocast('cpu' if not th.cuda.is_available() else 'cuda'):
118
- image = pipe(prompt, guidance_scale=scale, num_inference_steps=steps)["sample"][0]
 
 
119
  return image
120
 
121
 
122
- def compose(prompt, version, guidance_scale, steps):
123
  try:
124
  with th.no_grad():
125
  if version == 'Stable_Diffusion_1v_4':
126
- return stable_diffusion_compose(prompt, guidance_scale, steps)
127
  else:
128
  return compose_clevr_objects(prompt, guidance_scale, steps)
129
  except Exception as e:
130
  print(e)
131
  return None
132
 
133
-
134
- examples_1 = 'a camel | a forest'
135
  examples_2 = 'A blue sky | A mountain in the horizon | Cherry Blossoms in front of the mountain'
136
  examples_3 = '0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5'
137
- examples_4 = 'a blue house | a desert'
138
  examples_5 = 'a white church | lightning in the background'
139
  examples_6 = 'a camel | arctic'
140
  examples_7 = 'A lake | A mountain | Cherry Blossoms next to the lake'
141
  examples = [
142
- [examples_7, 'Stable_Diffusion_1v_4', 15, 50],
143
- [examples_5, 'Stable_Diffusion_1v_4', 15, 50],
144
- [examples_4, 'Stable_Diffusion_1v_4', 15, 50],
145
- [examples_6, 'Stable_Diffusion_1v_4', 15, 50],
146
- [examples_3, 'CLEVR Objects', 10, 100]
 
 
147
  ]
148
 
149
  title = 'Compositional Visual Generation with Composable Diffusion Models'
150
- description = '<p>Demo for Composable Diffusion<ul><li>~30s per Stable-Diffusion example</li><li>~10s per CLEVR Object example</li>(<b>Note</b>: time is varied depending on what gpu is used.)</ul></p><p>See more information from our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</p><ul><li>One version is based on the released <a href="https://github.com/openai/glide-text2im">GLIDE</a> and <a href="https://github.com/CompVis/stable-diffusion/">Stable Diffusion</a> for composing natural language description.</li><li>Another is based on our pre-trained CLEVR Object Model for composing objects. <br>(<b>Note</b>: We recommend using <b><i>x</i></b> in range <b><i>[0.1, 0.9]</i></b> and <b><i>y</i></b> in range <b><i>[0.25, 0.7]</i></b>, since the training dataset labels are in given ranges.)</li></ul><p>When composing multiple sentences, use `|` as the delimiter, see given examples below.</p><p><b>Note: When using Stable Diffusion, black images will be returned if the given prompt is detected as problematic. For composing GLIDE model, we recommend using the Colab demo in our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</b></p>'
151
 
152
  iface = gr.Interface(compose,
153
  inputs=[
154
- gr.Textbox(label='prompt', value='a white church | lightning in the background'),
 
155
  gr.Radio(['Stable_Diffusion_1v_4', 'CLEVR Objects'], type="value", label='version', value='Stable_Diffusion_1v_4'),
156
  gr.Slider(2, 30, value=15),
157
- gr.Slider(10, 200, value=50)
 
158
  ],
159
  outputs='image', cache_examples=False,
160
  title=title, description=description, examples=examples)
 
29
  # init stable diffusion model
30
  pipe = ComposableStableDiffusionPipeline.from_pretrained(
31
  "CompVis/stable-diffusion-v1-4",
32
+ use_auth_token='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'
33
  ).to(device)
34
 
35
+ def dummy(images, **kwargs):
36
+ return images, False
37
+ pipe.safety_checker = dummy
38
 
39
  # create model for CLEVR Objects
40
  clevr_options = model_and_diffusion_defaults_for_clevr()
 
116
  return out_img
117
 
118
 
119
+ def stable_diffusion_compose(prompt, scale, steps, weights, seed):
120
+ generator = th.Generator("cuda").manual_seed(int(seed))
121
  with autocast('cpu' if not th.cuda.is_available() else 'cuda'):
122
+ image = pipe(prompt, guidance_scale=scale, num_inference_steps=steps,
123
+ weights=weights, generator=generator)["sample"][0]
124
+ image.save(f'{"_".join(prompt.split())}.png')
125
  return image
126
 
127
 
128
+ def compose(prompt, weights, version, guidance_scale, steps, seed):
129
  try:
130
  with th.no_grad():
131
  if version == 'Stable_Diffusion_1v_4':
132
+ return stable_diffusion_compose(prompt, guidance_scale, steps, weights, seed)
133
  else:
134
  return compose_clevr_objects(prompt, guidance_scale, steps)
135
  except Exception as e:
136
  print(e)
137
  return None
138
 
139
+ examples_1 = "A castle in a forest | grainy, fog"
 
140
  examples_2 = 'A blue sky | A mountain in the horizon | Cherry Blossoms in front of the mountain'
141
  examples_3 = '0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5'
142
+ examples_4 = 'a photo of Obama | a photo of Biden'
143
  examples_5 = 'a white church | lightning in the background'
144
  examples_6 = 'a camel | arctic'
145
  examples_7 = 'A lake | A mountain | Cherry Blossoms next to the lake'
146
  examples = [
147
+ [examples_1, "1 | -1", 'Stable_Diffusion_1v_4', 15, 50, 0],
148
+ [examples_4, "1 | 1", 'Stable_Diffusion_1v_4', 15, 50, 0],
149
+ [examples_7, "1 | 1 | 1", 'Stable_Diffusion_1v_4', 15, 50, 0],
150
+ [examples_5, "1 | 1", 'Stable_Diffusion_1v_4', 15, 50, 0],
151
+ [examples_6, "1 | 1", 'Stable_Diffusion_1v_4', 15, 50, 0],
152
+ [examples_6, "1 | -1", 'Stable_Diffusion_1v_4', 15, 50, 0],
153
+ [examples_3, "1 | 1 | 1 | 1 | 1", 'CLEVR Objects', 10, 100, 0]
154
  ]
155
 
156
  title = 'Compositional Visual Generation with Composable Diffusion Models'
157
+ description = '<p>Our conjucntion and negation operators are also added into stable diffusion webui! (<a href="https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Negative-prompt">Negation</a> and <a href="https://github.com/AUTOMATIC1111/stable-diffusion-webui/commit/c26732fbee2a57e621ac22bf70decf7496daa4cd">Conjunction</a>)</p></p><p>See more information from our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</p><ul><li>One version is based on the released <a href="https://github.com/openai/glide-text2im">GLIDE</a> and <a href="https://github.com/CompVis/stable-diffusion/">Stable Diffusion</a> for composing natural language description.</li><li>Another is based on our pre-trained CLEVR Object Model for composing objects. <br>(<b>Note</b>: We recommend using <b><i>x</i></b> in range <b><i>[0.1, 0.9]</i></b> and <b><i>y</i></b> in range <b><i>[0.25, 0.7]</i></b>, since the training dataset labels are in given ranges.)</li></ul><p>When composing multiple sentences, use `|` as the delimiter, see given examples below.</p><p>You can also specify the weight of each text by using `|` as the delimiter. When the weight is negative, it will use Negation Operator (NOT). Otherwise it will use Conjucntion operator (AND).</p><p><b>Only Conjunction operator is enabled for CLEVR Object.</b></p><p><b>Note: When using Stable Diffusion, black images will be returned if the given prompt is detected as problematic. For composing GLIDE model, we recommend using the Colab demo in our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</b></p>'
158
 
159
  iface = gr.Interface(compose,
160
  inputs=[
161
+ gr.Textbox(label='prompt', value='a photo of Obama | a photo of Biden'),
162
+ gr.Textbox(label='weights', value='1 | 1'),
163
  gr.Radio(['Stable_Diffusion_1v_4', 'CLEVR Objects'], type="value", label='version', value='Stable_Diffusion_1v_4'),
164
  gr.Slider(2, 30, value=15),
165
+ gr.Slider(10, 200, value=50),
166
+ gr.Number(0)
167
  ],
168
  outputs='image', cache_examples=False,
169
  title=title, description=description, examples=examples)