import gradio as gr import random import os import io, base64 from PIL import Image import numpy import shortuuid latent = gr.Interface.load("spaces/multimodalart/latentdiffusion") rudalle = gr.Interface.load("spaces/multimodalart/rudalle") diffusion = gr.Interface.load("spaces/multimodalart/diffusion") print(diffusion) vqgan = gr.Interface.load("spaces/multimodalart/vqgan") def text2image_latent(text,steps,width,height,images,diversity): results = latent(text, steps, width, height, images, diversity) image_paths = [] image_arrays = [] for image in results[1]: image_str = image[0] image_str = image_str.replace("data:image/png;base64,","") decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8")) img = Image.open(io.BytesIO(decoded_bytes)) url = shortuuid.uuid() temp_dir = './tmp' if not os.path.exists(temp_dir): os.makedirs(temp_dir, exist_ok=True) image_path = f'{temp_dir}/{url}.png' img.save(f'{temp_dir}/{url}.png') image_paths.append(image_path) return(image_paths) def text2image_rudalle(text,aspect,model): image = rudalle(text,aspect,model)[0] return([image]) def text2image_vqgan(text,width,height,style,steps,flavor): results = vqgan(text,width,height,style,steps,flavor) return([results]) def text2image_diffusion(text,steps_diff, images_diff, weight, clip): results = diffusion(text, steps_diff, images_diff, weight, clip) image_paths = [] image_arrays = [] for image in results: image_str = image image_str = image_str.replace("data:image/png;base64,","") decoded_bytes = base64.decodebytes(bytes(image_str, "utf-8")) img = Image.open(io.BytesIO(decoded_bytes)) url = shortuuid.uuid() temp_dir = './tmp' if not os.path.exists(temp_dir): os.makedirs(temp_dir, exist_ok=True) image_path = f'{temp_dir}/{url}.png' img.save(f'{temp_dir}/{url}.png') image_paths.append(image_path) return(image_paths) css_mt = {"margin-top": "1em"} empty = gr.outputs.HTML() with gr.Blocks() as mindseye: gr.Markdown("
MindsEye Lite orchestrates multiple text-to-image Hugging Face Spaces in one convenient space, so you can try different models. This work carries the spirit of MindsEye Beta, a tool to run multiple models with a single UI, but adjusted to the current hardware limitations of Spaces. MindsEye Lite was created by @multimodalart, keep up with the latest multimodal ai art news here and consider supporting us on Patreon
") gr.Markdown("") text = gr.inputs.Textbox(placeholder="Type your prompt to generate an image", label="Prompt - try adding increments to your prompt such as 'oil on canvas', 'a painting'", default="A giant mecha robot in Rio de Janeiro, oil on canvas") with gr.Column(): with gr.Row(): with gr.Tabs(): with gr.TabItem("Latent Diffusion"): gr.Markdown("Latent Diffusion is the state of the art of open source text-to-image models, superb in text synthesis. Sometimes struggles with complex prompts") steps = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=45,maximum=50,minimum=1,step=1) width = gr.inputs.Slider(label="Width", default=256, step=32, maximum=256, minimum=32) height = gr.inputs.Slider(label="Height", default=256, step=32, maximum = 256, minimum=32) images = gr.inputs.Slider(label="Images - How many images you wish to generate", default=2, step=1, minimum=1, maximum=4) diversity = gr.inputs.Slider(label="Diversity scale - How different from one another you wish the images to be",default=5.0, minimum=1.0, maximum=15.0) get_image_latent = gr.Button("Generate Image",css=css_mt) with gr.TabItem("ruDALLE"): gr.Markdown("ruDALLE is a replication of DALL-E 1 in the russian language. No worries, your prompts will be translated automatically to russian. In case you see an error, try again a few times") aspect = gr.inputs.Radio(label="Aspect Ratio", choices=["Square", "Horizontal", "Vertical"],default="Square") model = gr.inputs.Dropdown(label="Model", choices=["Surrealism","Realism", "Emoji"], default="Surrealism") get_image_rudalle = gr.Button("Generate Image",css=css_mt) with gr.TabItem("VQGAN+CLIP"): gr.Markdown("VQGAN+CLIP is the most famous text-to-image generator. Can produce good artistic results") width_vq = gr.inputs.Slider(label="Width", default=256, minimum=32, step=32, maximum=512) height_vq= gr.inputs.Slider(label="Height", default=256, minimum=32, step=32, maximum=512) style = gr.inputs.Dropdown(label="Style - Hyper Fast Results is fast but compromises a bit of the quality",choices=["Default","Balanced","Detailed","Consistent Creativity","Realistic","Smooth","Subtle MSE","Hyper Fast Results"],default="Hyper Fast Results") steps_vq = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate. All styles that are not Hyper Fast need at least 200 steps",default=50,maximum=300,minimum=1,step=1) flavor = gr.inputs.Dropdown(label="Flavor - pick a flavor for the style of the images, based on the images below",choices=["ginger", "cumin", "holywater", "zynth", "wyvern", "aaron", "moth", "juu"]) get_image_vqgan = gr.Button("Generate Image",css=css_mt) with gr.TabItem("Guided Diffusion"): gr.Markdown("Guided Diffusion models produce superb quality results. V-Diffusion is its latest implementation") steps_diff = gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=40,maximum=80,minimum=1,step=1) images_diff = gr.inputs.Slider(label="Number of images in parallel", default=2, maximum=4, minimum=1, step=1) weight = gr.inputs.Slider(label="Weight - how closely the image should resemble the prompt", default=5, maximum=15, minimum=0, step=1) clip = gr.inputs.Checkbox(label="CLIP Guided - improves coherence with complex prompts, makes it slower") get_image_diffusion = gr.Button("Generate Image",css=css_mt) with gr.Row(): with gr.Tabs(): #with gr.TabItem("Image output"): # image = gr.outputs.Image() with gr.TabItem("Gallery output"): gallery = gr.Gallery(label="Individual images") get_image_latent.click(text2image_latent, inputs=[text,steps,width,height,images,diversity], outputs=gallery) get_image_rudalle.click(text2image_rudalle, inputs=[text,aspect,model], outputs=gallery) get_image_vqgan.click(text2image_vqgan, inputs=[text,width_vq,height_vq,style,steps_vq,flavor],outputs=gallery) get_image_diffusion.click(text2image_diffusion, inputs=[text, steps_diff, images_diff, weight, clip],outputs=gallery) mindseye.launch()