import gradio as gr
import torch
from diffusers import StableDiffusionPipeline, AutoencoderKL


title = "Fast Text-to-Image Generation on CPU"
description = """
This Space uses the sdxs-512-0.9 model which has the ability to generate high quality images in a fraction of the time of previous methods.

This Space demos the model on an inexpensive CPU, where it can  generate images in just a few seconds. When on a GPU this model can generate up to 100 images per second. 

Model: https://huggingface.co/IDKiro/sdxs-512-0.9\n
Paper: https://arxiv.org/pdf/2403.16627.pdf


"""


def generate_image(prompt):

    repo = "IDKiro/sdxs-512-0.9"
    weight_type = torch.float32

    # Load model.
    pipe = StableDiffusionPipeline.from_pretrained(repo, torch_dtype=weight_type)
    # pipe.vae = AutoencoderKL.from_pretrained("IDKiro/sdxs-512-0.9/vae_large")     # use original VAE
    # pipe.to("cuda") # add this in only for gpu inference

    # Ensure using the same inference steps as the loaded model and CFG set to 0.
    image = pipe(
        prompt, 
        num_inference_steps=1, 
        guidance_scale=0,
        generator=torch.Generator(device="cpu")  # change to 'cuda' for gpu inference
    ).images[0]

    return image


# Build the Gradio interface
iface_generate_image = gr.Interface(
    fn=generate_image,
    title=title, 
    description=description,
    inputs=[
        gr.Textbox(label="Text Prompt", placeholder="Type your prompt here..."),
    ],
    outputs=gr.Image(label="Generated Image"),
    allow_flagging="never",
)

# start interface
iface_generate_image.launch()