Spaces:
Runtime error
Runtime error
Commit
•
37f8606
1
Parent(s):
b513fe0
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import random
|
3 |
+
import os
|
4 |
+
import io, base64
|
5 |
+
from PIL import Image
|
6 |
+
import numpy
|
7 |
+
import shortuuid
|
8 |
+
|
9 |
+
latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
|
10 |
+
rudalle = gr.Interface.load("spaces/multimodalart/rudalle")
|
11 |
+
diffusion = gr.Interface.load("spaces/multimodalart/diffusion")
|
12 |
+
vqgan = gr.Interface.load("spaces/multimodalart/vqgan")
|
13 |
+
|
14 |
+
def text2image_latent(text,steps,width,height,images,diversity):
|
15 |
+
results = latent(text, steps, width, height, images, diversity)
|
16 |
+
image_paths = []
|
17 |
+
image_arrays = []
|
18 |
+
for image in results[1]:
|
19 |
+
image_str = image[0]
|
20 |
+
image_str = image_str.replace("data:image/png;base64,","")
|
21 |
+
decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
|
22 |
+
img = Image.open(io.BytesIO(decoded_bytes))
|
23 |
+
url = shortuuid.uuid()
|
24 |
+
temp_dir = './tmp'
|
25 |
+
if not os.path.exists(temp_dir):
|
26 |
+
os.makedirs(temp_dir, exist_ok=True)
|
27 |
+
image_path = f'{temp_dir}/{url}.png'
|
28 |
+
img.save(f'{temp_dir}/{url}.png')
|
29 |
+
image_paths.append(image_path)
|
30 |
+
return(image_paths)
|
31 |
+
|
32 |
+
def text2image_rudalle(text,aspect,model):
|
33 |
+
image = rudalle(text,aspect,model)[0]
|
34 |
+
return([image])
|
35 |
+
|
36 |
+
def text2image_vqgan(text,width,height,style,steps,flavor):
|
37 |
+
results = vqgan(text,width,height,style,steps,flavor)
|
38 |
+
return([results])
|
39 |
+
|
40 |
+
def text2image_diffusion(text,steps_diff, images_diff, weight, clip):
|
41 |
+
results = diffusion(text, steps_diff, images_diff, weight, clip)
|
42 |
+
image_paths = []
|
43 |
+
print(results)
|
44 |
+
for image in results:
|
45 |
+
print('how many')
|
46 |
+
image_str = image
|
47 |
+
image_str = image_str.replace("data:image/png;base64,","")
|
48 |
+
decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
|
49 |
+
img = Image.open(io.BytesIO(decoded_bytes))
|
50 |
+
url = shortuuid.uuid()
|
51 |
+
temp_dir = './tmp'
|
52 |
+
if not os.path.exists(temp_dir):
|
53 |
+
os.makedirs(temp_dir, exist_ok=True)
|
54 |
+
image_path = f'{temp_dir}/{url}.png'
|
55 |
+
img.save(f'{temp_dir}/{url}.png')
|
56 |
+
image_paths.append(image_path)
|
57 |
+
return(image_paths)
|
58 |
+
|
59 |
+
def text2image_dallemini(text):
|
60 |
+
pass
|
61 |
+
|
62 |
+
css_mt = {"margin-top": "1em"}
|
63 |
+
|
64 |
+
empty = gr.outputs.HTML()
|
65 |
+
|
66 |
+
with gr.Blocks() as mindseye:
|
67 |
+
gr.Markdown("<h1>MindsEye Lite <small><small>run multiple text-to-image models in one place</small></small></h1><p>MindsEye Lite orchestrates multiple text-to-image models in one Spaces. This work carries the spirit of <a href='https://multimodal.art/mindseye' target='_blank'>MindsEye Beta</a>, but with simplified versions of the models due to current hardware limitations of Spaces. MindsEye Lite was created by <a style='color: rgb(99, 102, 241);font-weight:bold' href='https://twitter.com/multimodalart' target='_blank'>@multimodalart</a>, keep up with the <a style='color: rgb(99, 102, 241);' href='https://multimodal.art/news' target='_blank'>latest multimodal ai art news here</a>, join our <a href='https://discord.gg/FsDBTE5BNx'>Discord</a> and consider <a style='color: rgb(99, 102, 241);' href='https://www.patreon.com/multimodalart' target='_blank'>supporting us on Patreon</a></div></p>")
|
68 |
+
gr.Markdown("<style>h1{margin-bottom:0em !important} .svelte-9r19iu > .grid {grid-template-columns: repeat(3,minmax(0,1fr));} </style>")
|
69 |
+
text = gr.inputs.Textbox(placeholder="Type your prompt to generate an image", label="Prompt - try adding increments to your prompt such as 'a painting of', 'in the style of Picasso'", default="A giant mecha robot in Rio de Janeiro, oil on canvas")
|
70 |
+
with gr.Row():
|
71 |
+
with gr.Column():
|
72 |
+
with gr.Tabs():
|
73 |
+
with gr.TabItem("Latent Diffusion"):
|
74 |
+
gr.Markdown("<a href='https://huggingface.co/spaces/multimodalart/latentdiffusion' target='_blank'>Latent Diffusion</a> is the state of the art of open source text-to-image models, superb in text synthesis. Sometimes struggles with complex prompts")
|
75 |
+
steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=45,maximum=50,minimum=1,step=1)
|
76 |
+
width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
|
77 |
+
height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
|
78 |
+
images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=2, step=1, minimum=1, maximum=4)
|
79 |
+
diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=5.0, minimum=1.0, maximum=15.0)
|
80 |
+
get_image_latent = gr.Button("Generate Image",css=css_mt)
|
81 |
+
with gr.TabItem("VQGAN+CLIP"):
|
82 |
+
gr.Markdown("<a href='https://huggingface.co/spaces/multimodalart/vqgan' target='_blank'>VQGAN+CLIP</a> is the most famous text-to-image generator. Can produce good artistic results")
|
83 |
+
width_vq = gr.inputs.Slider(label="Width", default=256, minimum=32, step=32, maximum=512)
|
84 |
+
height_vq= gr.inputs.Slider(label="Height", default=256, minimum=32, step=32, maximum=512)
|
85 |
+
style = gr.inputs.Dropdown(label="Style - Hyper Fast Results is fast but compromises a bit of the quality",choices=["Default","Balanced","Detailed","Consistent Creativity","Realistic","Smooth","Subtle MSE","Hyper Fast Results"],default="Hyper Fast Results")
|
86 |
+
steps_vq = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate. All styles that are not Hyper Fast need at least 200 steps",default=50,maximum=300,minimum=1,step=1)
|
87 |
+
flavor = gr.inputs.Dropdown(label="Flavor - pick a flavor for the style of the images, based on the images below",choices=["ginger", "cumin", "holywater", "zynth", "wyvern", "aaron", "moth", "juu"])
|
88 |
+
get_image_vqgan = gr.Button("Generate Image",css=css_mt)
|
89 |
+
with gr.TabItem("Guided Diffusion"):
|
90 |
+
gr.Markdown("<a href='https://huggingface.co/spaces/multimodalart/diffusion' target='_blank'>Guided Diffusion</a> models produce superb quality results. V-Diffusion is its latest implementation")
|
91 |
+
steps_diff = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=40,maximum=80,minimum=1,step=1)
|
92 |
+
images_diff = gr.inputs.Slider(label="Number of images in parallel", default=2, maximum=4, minimum=1, step=1)
|
93 |
+
weight = gr.inputs.Slider(label="Weight - how closely the image should resemble the prompt", default=5, maximum=15, minimum=0, step=1)
|
94 |
+
clip = gr.inputs.Checkbox(label="CLIP Guided - improves coherence with complex prompts, makes it slower")
|
95 |
+
get_image_diffusion = gr.Button("Generate Image",css=css_mt)
|
96 |
+
with gr.TabItem("ruDALLE"):
|
97 |
+
gr.Markdown("<a href='https://huggingface.co/spaces/multimodalart/rudalle' target='_blank'>ruDALLE</a> is a replication of DALL-E 1 in the russian language. No worries, your prompts will be translated automatically to russian. In case you see an error, try again a few times")
|
98 |
+
aspect = gr.inputs.Radio(label="Aspect Ratio", choices=["Square", "Horizontal", "Vertical"],default="Square")
|
99 |
+
model = gr.inputs.Dropdown(label="Model", choices=["Surrealism","Realism", "Emoji"], default="Surrealism")
|
100 |
+
get_image_rudalle = gr.Button("Generate Image",css=css_mt)
|
101 |
+
with gr.Column():
|
102 |
+
with gr.Tabs():
|
103 |
+
#with gr.TabItem("Image output"):
|
104 |
+
# image = gr.outputs.Image()
|
105 |
+
with gr.TabItem("Gallery output"):
|
106 |
+
gallery = gr.Gallery(label="Individual images")
|
107 |
+
with gr.Row():
|
108 |
+
gr.Markdown("<h4 style='font-size: 110%;margin-top:.5em'>Biases acknowledgment</h4><div>Despite how impressive being able to turn text into image is, beware to the fact that this model may output content that reinforces or exarcbates societal biases. According to the <a href='https://arxiv.org/abs/2112.10752' target='_blank'>Latent Diffusion paper</a>:<i> \"Deep learning modules tend to reproduce or exacerbate biases that are already present in the data\"</i>. The model was trained on both the Imagenet dataset and in an undisclosed dataset by OpenAI.</div><h4 style='font-size: 110%;margin-top:1em'>Who owns the images produced by this demo?</h4><div>Definetly not me! Probably you do. I say probably because the Copyright discussion about AI generated art is ongoing. So <a href='https://www.theverge.com/2022/2/21/22944335/us-copyright-office-reject-ai-generated-art-recent-entrance-to-paradise' target='_blank'>it may be the case that everything produced here falls automatically into the public domain</a>. But in any case it is either yours or is in the public domain.</div>")
|
109 |
+
get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=gallery)
|
110 |
+
get_image_rudalle.click(text2image_rudalle, inputs=[text,aspect,model], outputs=gallery)
|
111 |
+
get_image_vqgan.click(text2image_vqgan, inputs=[text,width_vq,height_vq,style,steps_vq,flavor],outputs=gallery)
|
112 |
+
get_image_diffusion.click(text2image_diffusion, inputs=[text, steps_diff, images_diff, weight, clip],outputs=gallery)
|
113 |
+
mindseye.launch(enable_queue=False)
|