apolinario commited on
Commit
0098e32
1 Parent(s): 8078b9d

Attempt to add vqgan and diffusion

Browse files
Files changed (1) hide show
  1. app.py +29 -12
app.py CHANGED
@@ -8,10 +8,10 @@ import shortuuid
8
 
9
  latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
10
  rudalle = gr.Interface.load("spaces/multimodalart/rudalle")
 
 
 
11
 
12
- #print(rudalle)
13
- #guided = gr.Interface.load("spaces/EleutherAI/clip-guided-diffusion")
14
- #print(guided)
15
  def text2image_latent(text,steps,width,height,images,diversity):
16
  results = latent(text, steps, width, height, images, diversity)
17
  image_paths = []
@@ -35,11 +35,14 @@ def text2image_rudalle(text,aspect,model):
35
  image = rudalle(text,aspect,model)[0]
36
  return(image)
37
 
38
- #def text2image_guided(text):
39
- # image = guided(text, None, 10, 600, 0, 0, 0, random.randint(0,2147483647), None, 50, 32)
40
- # print(image)
41
- # image = image[0]
42
- # return(image)
 
 
 
43
 
44
  css_mt = {"margin-top": "1em"}
45
 
@@ -53,21 +56,33 @@ with gr.Blocks() as mindseye:
53
  with gr.Row():
54
  with gr.Tabs():
55
  with gr.TabItem("Latent Diffusion"):
 
56
  steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=45,maximum=50,minimum=1,step=1)
57
  width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
58
  height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
59
  images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=2, step=1, minimum=1, maximum=4)
60
  diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=5.0, minimum=1.0, maximum=15.0)
61
  get_image_latent = gr.Button("Generate Image",css=css_mt)
62
-
63
  with gr.TabItem("ruDALLE"):
 
64
  aspect = gr.inputs.Radio(label="Aspect Ratio", choices=["Square", "Horizontal", "Vertical"],default="Square")
65
  model = gr.inputs.Dropdown(label="Model", choices=["Surrealism","Realism", "Emoji"], default="Surrealism")
66
  get_image_rudalle = gr.Button("Generate Image",css=css_mt)
67
  with gr.TabItem("VQGAN+CLIP"):
68
- pass
69
- with gr.TabItem("V-Diffusion"):
70
- pass
 
 
 
 
 
 
 
 
 
 
 
71
  with gr.Row():
72
  with gr.Tabs():
73
  with gr.TabItem("Image output"):
@@ -77,4 +92,6 @@ with gr.Blocks() as mindseye:
77
 
78
  get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=[image,gallery])
79
  get_image_rudalle.click(text2image_rudalle, inputs=[text,aspect,model], outputs=image)
 
 
80
  mindseye.launch()
 
8
 
9
  latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
10
  rudalle = gr.Interface.load("spaces/multimodalart/rudalle")
11
+ diffusion = gr.Interface.load("spaces/multimodalart/diffusion")
12
+ print(diffusion)
13
+ vqgan = gr.Interface.load("spaces/multimodalart/vqgan")
14
 
 
 
 
15
  def text2image_latent(text,steps,width,height,images,diversity):
16
  results = latent(text, steps, width, height, images, diversity)
17
  image_paths = []
 
35
  image = rudalle(text,aspect,model)[0]
36
  return(image)
37
 
38
+ def text2image_vqgan(text,width,height,style,steps,flavor):
39
+ results = vqgan(text,width,height,style,steps,flavor)
40
+ return(results)
41
+
42
+ def text2image_diffusion(steps_diff, images_diff, weight, clip):
43
+ results = diffusion(steps_diff, images_diff, weight, clip)
44
+ print(results)
45
+ return(results)
46
 
47
  css_mt = {"margin-top": "1em"}
48
 
 
56
  with gr.Row():
57
  with gr.Tabs():
58
  with gr.TabItem("Latent Diffusion"):
59
+ gr.Markdown("Latent Diffusion is the state of the art of open source text-to-image models, superb in text synthesis. Sometimes struggles with complex prompts")
60
  steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=45,maximum=50,minimum=1,step=1)
61
  width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
62
  height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
63
  images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=2, step=1, minimum=1, maximum=4)
64
  diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=5.0, minimum=1.0, maximum=15.0)
65
  get_image_latent = gr.Button("Generate Image",css=css_mt)
 
66
  with gr.TabItem("ruDALLE"):
67
+ gr.Markdown("ruDALLE is a replication of DALL-E 1 in the russian language. No worries, your prompts will be translated automatically to russian. In case you see an error, try again a few times")
68
  aspect = gr.inputs.Radio(label="Aspect Ratio", choices=["Square", "Horizontal", "Vertical"],default="Square")
69
  model = gr.inputs.Dropdown(label="Model", choices=["Surrealism","Realism", "Emoji"], default="Surrealism")
70
  get_image_rudalle = gr.Button("Generate Image",css=css_mt)
71
  with gr.TabItem("VQGAN+CLIP"):
72
+ gr.Markdown("VQGAN+CLIP is the most famous text-to-image generator. Can produce good artistic results")
73
+ width_vq = gr.inputs.Slider(label="Width", default=256, minimum=32, step=32, maximum=512)
74
+ height_vq= gr.inputs.Slider(label="Height", default=256, minimum=32, step=32, maximum=512)
75
+ style = gr.inputs.Dropdown(label="Style - Hyper Fast Results is fast but compromises a bit of the quality",choices=["Default","Balanced","Detailed","Consistent Creativity","Realistic","Smooth","Subtle MSE","Hyper Fast Results"],default="Hyper Fast Results")
76
+ steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate. All styles that are not Hyper Fast need at least 200 steps",default=50,maximum=300,minimum=1,step=1)
77
+ flavor = gr.inputs.Dropdown(label="Flavor - pick a flavor for the style of the images, based on the images below",choices=["ginger", "cumin", "holywater", "zynth", "wyvern", "aaron", "moth", "juu"])
78
+ get_image_vqgan = gr.button("Generate Image",css=css_mt)
79
+ with gr.TabItem("Guided Diffusion"):
80
+ gr.Markdown("Guided Diffusion models produce superb quality results. V-Diffusion is its latest implementation")
81
+ steps_diff = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=40,maximum=80,minimum=1,step=1)
82
+ images_diff = gr.inputs.Slider(label="Number of images in parallel", default=2, maximum=4, minimum=1, step=1)
83
+ weight = gr.inputs.Slider(label="Weight - how closely the image should resemble the prompt", default=5, maximum=15, minimum=0, step=1)
84
+ clip = gr.inputs.Checkbox(label="CLIP Guided - improves coherence with complex prompts, makes it slower")
85
+ get_image_diffusion = gr.button("Generate Image",css=css_mt)
86
  with gr.Row():
87
  with gr.Tabs():
88
  with gr.TabItem("Image output"):
 
92
 
93
  get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=[image,gallery])
94
  get_image_rudalle.click(text2image_rudalle, inputs=[text,aspect,model], outputs=image)
95
+ get_image_vqgan.click(text2image_vqgan, inputs=[text,width_vq,height_vq,style,steps,flavor],outputs=image)
96
+ get_image_diffusion.click(text2image_diffusion, inputs=[steps_diff, images_diff, weight, clip],outputs=gallery)
97
  mindseye.launch()