File size: 8,491 Bytes
2eb58d1 199e379 84f6f2e 2eb58d1 0098e32 84f6f2e a17b7c9 84f6f2e e9f0715 744a1fc 199e379 2eb58d1 4e01792 744a1fc 2eb58d1 0098e32 744a1fc 0098e32 3ea6729 744a1fc d303ce7 744a1fc 2eb58d1 84f6f2e 2eb58d1 fd00b5e 63f5381 09e1d9b fd00b5e 523a420 63f5381 c0ceaaf fd00b5e 63f5381 fd00b5e 63f5381 0098e32 ae24367 0098e32 7e5566d 0098e32 63f5381 0098e32 7e5566d fd00b5e 744a1fc fd00b5e 16489cb 744a1fc ae24367 3ea6729 fd00b5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import gradio as gr
import random
import os
import io, base64
from PIL import Image
import numpy
import shortuuid
latent = gr.Interface.load("spaces/multimodalart/latentdiffusion")
rudalle = gr.Interface.load("spaces/multimodalart/rudalle")
diffusion = gr.Interface.load("spaces/multimodalart/diffusion")
print(diffusion)
vqgan = gr.Interface.load("spaces/multimodalart/vqgan")
def text2image_latent(text,steps,width,height,images,diversity):
results = latent(text, steps, width, height, images, diversity)
image_paths = []
image_arrays = []
for image in results[1]:
image_str = image[0]
image_str = image_str.replace("data:image/png;base64,","")
decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
img = Image.open(io.BytesIO(decoded_bytes))
url = shortuuid.uuid()
temp_dir = './tmp'
if not os.path.exists(temp_dir):
os.makedirs(temp_dir, exist_ok=True)
image_path = f'{temp_dir}/{url}.png'
img.save(f'{temp_dir}/{url}.png')
image_paths.append(image_path)
return(image_paths)
def text2image_rudalle(text,aspect,model):
image = rudalle(text,aspect,model)[0]
return([image])
def text2image_vqgan(text,width,height,style,steps,flavor):
results = vqgan(text,width,height,style,steps,flavor)
return([results])
def text2image_diffusion(text,steps_diff, images_diff, weight, clip):
results = diffusion(text, steps_diff, images_diff, weight, clip)
image_paths = []
image_arrays = []
for image in results:
image_str = image
image_str = image_str.replace("data:image/png;base64,","")
decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8"))
img = Image.open(io.BytesIO(decoded_bytes))
url = shortuuid.uuid()
temp_dir = './tmp'
if not os.path.exists(temp_dir):
os.makedirs(temp_dir, exist_ok=True)
image_path = f'{temp_dir}/{url}.png'
img.save(f'{temp_dir}/{url}.png')
image_paths.append(image_path)
return(image_paths)
css_mt = {"margin-top": "1em"}
empty = gr.outputs.HTML()
with gr.Blocks() as mindseye:
gr.Markdown("<h1>MindsEye Lite <small><small>run multiple text-to-image models in one place</small></small></h1><p>MindsEye Lite orchestrates multiple text-to-image models in one Spaces. This work carries the spirit of <a href='https://multimodal.art/mindseye' target='_blank'>MindsEye Beta</a>, but with simplified versions of the models due to current hardware limitations of Spaces. MindsEye Lite was created by <a style='color: rgb(99, 102, 241);font-weight:bold' href='https://twitter.com/multimodalart' target='_blank'>@multimodalart</a>, keep up with the <a style='color: rgb(99, 102, 241);' href='https://multimodal.art/news' target='_blank'>latest multimodal ai art news here</a> and consider <a style='color: rgb(99, 102, 241);' href='https://www.patreon.com/multimodalart' target='_blank'>supporting us on Patreon</a></div></p>")
gr.Markdown("<style>h1{margin-bottom:1em} .mx-auto.container .gr-form-gap {flex-direction: row; gap: calc(1rem * calc(1 - var(--tw-space-y-reverse)));} .mx-auto.container .gr-form-gap .flex-col, .mx-auto.container .gr-form-gap .gr-box{width: 100%} .svelte-1bwm9qh > .grid {grid-template-columns: repeat(3,minmax(0,1fr));} @media only screen and (max-width: 1000px){.mx-auto.container .gr-form-gap{flex-direction: column}}</style>")
text = gr.inputs.Textbox(placeholder="Type your prompt to generate an image", label="Prompt - try adding increments to your prompt such as 'a painting of', 'in the style of Picasso'", default="A giant mecha robot in Rio de Janeiro, oil on canvas")
with gr.Column():
with gr.Row():
with gr.Tabs():
with gr.TabItem("Latent Diffusion"):
gr.Markdown("<a href='https://huggingface.co/spaces/multimodalart/latentdiffusion' target='_blank'>Latent Diffusion</a> is the state of the art of open source text-to-image models, superb in text synthesis. Sometimes struggles with complex prompts")
steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=45,maximum=50,minimum=1,step=1)
width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32)
height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32)
images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=2, step=1, minimum=1, maximum=4)
diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=5.0, minimum=1.0, maximum=15.0)
get_image_latent = gr.Button("Generate Image",css=css_mt)
with gr.TabItem("ruDALLE"):
gr.Markdown("<a href='https://huggingface.co/spaces/multimodalart/rudalle' target='_blank'>ruDALLE</a> is a replication of DALL-E 1 in the russian language. No worries, your prompts will be translated automatically to russian. In case you see an error, try again a few times")
aspect = gr.inputs.Radio(label="Aspect Ratio", choices=["Square", "Horizontal", "Vertical"],default="Square")
model = gr.inputs.Dropdown(label="Model", choices=["Surrealism","Realism", "Emoji"], default="Surrealism")
get_image_rudalle = gr.Button("Generate Image",css=css_mt)
with gr.TabItem("VQGAN+CLIP"):
gr.Markdown("<a href='https://huggingface.co/spaces/multimodalart/vqgan' target='_blank'>VQGAN+CLIP</a> is the most famous text-to-image generator. Can produce good artistic results")
width_vq = gr.inputs.Slider(label="Width", default=256, minimum=32, step=32, maximum=512)
height_vq= gr.inputs.Slider(label="Height", default=256, minimum=32, step=32, maximum=512)
style = gr.inputs.Dropdown(label="Style - Hyper Fast Results is fast but compromises a bit of the quality",choices=["Default","Balanced","Detailed","Consistent Creativity","Realistic","Smooth","Subtle MSE","Hyper Fast Results"],default="Hyper Fast Results")
steps_vq = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate. All styles that are not Hyper Fast need at least 200 steps",default=50,maximum=300,minimum=1,step=1)
flavor = gr.inputs.Dropdown(label="Flavor - pick a flavor for the style of the images, based on the images below",choices=["ginger", "cumin", "holywater", "zynth", "wyvern", "aaron", "moth", "juu"])
get_image_vqgan = gr.Button("Generate Image",css=css_mt)
with gr.TabItem("Guided Diffusion"):
gr.Markdown("<a href='https://huggingface.co/spaces/multimodalart/diffusion' target='_blank'>Guided Diffusion</a> models produce superb quality results. V-Diffusion is its latest implementation")
steps_diff = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=40,maximum=80,minimum=1,step=1)
images_diff = gr.inputs.Slider(label="Number of images in parallel", default=2, maximum=4, minimum=1, step=1)
weight = gr.inputs.Slider(label="Weight - how closely the image should resemble the prompt", default=5, maximum=15, minimum=0, step=1)
clip = gr.inputs.Checkbox(label="CLIP Guided - improves coherence with complex prompts, makes it slower")
get_image_diffusion = gr.Button("Generate Image",css=css_mt)
with gr.Row():
with gr.Tabs():
#with gr.TabItem("Image output"):
# image = gr.outputs.Image()
with gr.TabItem("Gallery output"):
gallery = gr.Gallery(label="Individual images")
get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=gallery)
get_image_rudalle.click(text2image_rudalle, inputs=[text,aspect,model], outputs=gallery)
get_image_vqgan.click(text2image_vqgan, inputs=[text,width_vq,height_vq,style,steps_vq,flavor],outputs=gallery)
get_image_diffusion.click(text2image_diffusion, inputs=[text, steps_diff, images_diff, weight, clip],outputs=gallery)
mindseye.launch() |