Shuang59 commited on
Commit
e9116d0
β€’
1 Parent(s): ff2dfb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -3
app.py CHANGED
@@ -24,7 +24,7 @@ from composable_diffusion.model_creation import create_model_and_diffusion as cr
24
  from composable_diffusion.model_creation import model_and_diffusion_defaults as model_and_diffusion_defaults_for_clevr
25
 
26
 
27
- from PIL import Image
28
 
29
  from torch import autocast
30
  from diffusers import StableDiffusionPipeline
@@ -316,7 +316,20 @@ def compose(prompt, version, guidance_scale, steps):
316
  model.to(cpu)
317
  model_up.to(cpu)
318
  clevr_model.to(device)
319
- return compose_clevr_objects(prompt, guidance_scale, steps)
 
 
 
 
 
 
 
 
 
 
 
 
 
320
 
321
 
322
  examples_1 = 'a camel | a forest'
@@ -339,7 +352,7 @@ examples = [
339
  import gradio as gr
340
 
341
  title = 'Compositional Visual Generation with Composable Diffusion Models'
342
- description = '<p>Demo for Composable Diffusion<ul><li>~30s per GLIDE/Stable-Diffusion example</li><li>~10s per CLEVR Object example</li>(<b>Note</b>: time is varied depending on what gpu is used.)</ul></p><p>See more information from our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</p><ul><li>One version is based on the released <a href="https://github.com/openai/glide-text2im">GLIDE</a> and <a href="https://github.com/CompVis/stable-diffusion/">Stable Diffusion</a> for composing natural language description.</li><li>Another is based on our pre-trained CLEVR Object Model for composing objects. <br>(<b>Note</b>: We recommend using <b><i>x</i></b> in range <b><i>[0.1, 0.9]</i></b> and <b><i>y</i></b> in range <b><i>[0.25, 0.7]</i></b>, since the training dataset labels are in given ranges.)</li></ul><p>When composing multiple sentences, use `|` as the delimiter, see given examples below.</p><p><b>Note</b>: When using more steps, the results can improve.</p>'
343
 
344
  iface = gr.Interface(compose,
345
  inputs=[
 
24
  from composable_diffusion.model_creation import model_and_diffusion_defaults as model_and_diffusion_defaults_for_clevr
25
 
26
 
27
+ from PIL import Image, ImageDraw, ImageFont
28
 
29
  from torch import autocast
30
  from diffusers import StableDiffusionPipeline
 
316
  model.to(cpu)
317
  model_up.to(cpu)
318
  clevr_model.to(device)
319
+ # simple check
320
+ is_text = True
321
+ for char in prompt:
322
+ if char.isdigit():
323
+ is_text = False
324
+ break
325
+ if is_text:
326
+ img = Image.new('RGB', (512, 512), color=(255, 255, 255))
327
+ d = ImageDraw.Draw(img)
328
+ font = ImageFont.load_default()
329
+ d.text((0, 256), "input should be similar to the example using 2D coordinates.", fill=(0, 0, 0), font=font)
330
+ return img
331
+ else:
332
+ return compose_clevr_objects(prompt, guidance_scale, steps)
333
 
334
 
335
  examples_1 = 'a camel | a forest'
 
352
  import gradio as gr
353
 
354
  title = 'Compositional Visual Generation with Composable Diffusion Models'
355
+ description = '<p>Demo for Composable Diffusion<ul><li>~30s per GLIDE/Stable-Diffusion example</li><li>~10s per CLEVR Object example</li>(<b>Note</b>: time is varied depending on what gpu is used.)</ul></p><p>See more information from our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</p><ul><li>One version is based on the released <a href="https://github.com/openai/glide-text2im">GLIDE</a> and <a href="https://github.com/CompVis/stable-diffusion/">Stable Diffusion</a> for composing natural language description.</li><li>Another is based on our pre-trained CLEVR Object Model for composing objects. <br>(<b>Note</b>: We recommend using <b><i>x</i></b> in range <b><i>[0.1, 0.9]</i></b> and <b><i>y</i></b> in range <b><i>[0.25, 0.7]</i></b>, since the training dataset labels are in given ranges.)</li></ul><p>When composing multiple sentences, use `|` as the delimiter, see given examples below.</p><p><b>Note</b>: When using Stable Diffusion, black images will be returned if the given prompt is detected as problematic.</p>'
356
 
357
  iface = gr.Interface(compose,
358
  inputs=[