from concurrent.futures import ThreadPoolExecutor import uuid import gradio as gr from PIL import Image import torch from compel import Compel, ReturnedEmbeddingsType from diffusers import DiffusionPipeline def save_image(img): unique_name = str(uuid.uuid4()) + '.png' img.save(unique_name) return unique_name def save_images(image_array): paths = [] with ThreadPoolExecutor() as executor: paths = list(executor.map(save_image, image_array)) return paths device = "cuda" if torch.cuda.is_available() else "cpu" pipe = DiffusionPipeline.from_pretrained( "amused/amused-512", variant="fp16", torch_dtype=torch.float16, ).to(device) compel = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, requires_pooled=True, truncate_long_prompts=False) def infer(prompt, negative="", scale=10, progress=gr.Progress(track_tqdm=True)): print("Generating:") conditioning, pooled = compel(prompt) negative_conditioning, negative_pooled = compel(negative) conditioning, negative_conditioning = compel.pad_conditioning_tensors_to_same_length([conditioning, negative_conditioning]) images = pipe( prompt_embeds=pooled, encoder_hidden_states=conditioning, negative_prompt_embeds=negative_pooled, negative_encoder_hidden_states=negative_conditioning, guidance_scale=scale, num_images_per_prompt=4, temperature=(3, 1), ).images print("Done Generating!") print("Num Images:", len(images)) return images examples = [ [ 'A serious capybara at work, wearing a suit', None, None, ], [ 'A pikachu fine dining with a view to the Eiffel Tower', None, None, ], [ 'A mecha robot in a favela in expressionist style', None, None, ], [ 'an insect robot preparing a delicious meal', None, None, ], [ "A small cabin on top of a snowy mountain in the style of Disney, artstation", None, None, ], ] css = """ h1 { text-align: center; } #component-0 { max-width: 730px; margin: auto; } """ block = gr.Blocks(css=css) with block: gr.HTML( """

aMUSEd: Efficient Text-to-Image Model

aMUSEd is an open-source, lightweight masked image model for text-to-image generation based on MUSE focused on fast image generation.

""" ) with gr.Group(): with gr.Row(elem_id="prompt-container").style(mobile_collapse=False, equal_height=True): with gr.Column(): text = gr.Textbox( label="Enter your prompt", show_label=False, max_lines=1, placeholder="Enter your prompt", container=False, ) negative = gr.Textbox( label="Enter your negative prompt", show_label=False, max_lines=1, placeholder="Enter your negative prompt", container=False, value="low quality, ugly, deformed" ) btn = gr.Button("Generate image", scale=0) gallery = gr.Gallery( label="Generated images", show_label=False, ).style(grid=[2]) with gr.Accordion("Advanced settings", open=False): guidance_scale = gr.Slider( label="Guidance Scale", minimum=0, maximum=20, value=10, step=0.1 ) ex = gr.Examples(examples=examples, fn=infer, inputs=[text, negative, guidance_scale], outputs=gallery, cache_examples=False) ex.dataset.headers = [""] text.submit(infer, inputs=[text, negative, guidance_scale], outputs=gallery) negative.submit(infer, inputs=[text, negative, guidance_scale], outputs=gallery) btn.click(infer, inputs=[text, negative, guidance_scale], outputs=gallery) block.launch()