import gradio as gr import torch from diffusers import StableDiffusionPipeline, AutoencoderKL title = "Fast Text-to-Image Generation on CPU" description = """ This Space uses the sdxs-512-0.9 model which has the ability to generate high quality images in a fraction of the time of previous methods. This Space demos the model on an inexpensive CPU, where it can generate images in just a few seconds. When on a GPU this model can generate up to 100 images per second. Model: https://huggingface.co/IDKiro/sdxs-512-0.9\n Paper: https://arxiv.org/pdf/2403.16627.pdf """ def generate_image(prompt): repo = "IDKiro/sdxs-512-0.9" weight_type = torch.float32 # Load model. pipe = StableDiffusionPipeline.from_pretrained(repo, torch_dtype=weight_type) # pipe.vae = AutoencoderKL.from_pretrained("IDKiro/sdxs-512-0.9/vae_large") # use original VAE # pipe.to("cuda") # add this in only for gpu inference # Ensure using the same inference steps as the loaded model and CFG set to 0. image = pipe( prompt, num_inference_steps=1, guidance_scale=0, generator=torch.Generator(device="cpu") # change to 'cuda' for gpu inference ).images[0] return image # Build the Gradio interface iface_generate_image = gr.Interface( fn=generate_image, title=title, description=description, inputs=[ gr.Textbox(label="Text Prompt", placeholder="Type your prompt here..."), ], outputs=gr.Image(label="Generated Image"), allow_flagging="never", ) # start interface iface_generate_image.launch()