SDXL-Lightning

import gradio as gr
import torch
import spaces
from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
from diffusers.image_processor import VaeImageProcessor
from transformers import CLIPImageProcessor
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file

device = "cuda"
dtype = torch.float16

base = "stabilityai/stable-diffusion-xl-base-1.0"
repo = "ByteDance/SDXL-Lightning"
opts = {
    "1 Step"  : ("sdxl_lightning_1step_unet_x0.safetensors", 1),
    "2 Steps" : ("sdxl_lightning_2step_unet.safetensors", 2),
    "4 Steps" : ("sdxl_lightning_4step_unet.safetensors", 4),
    "8 Steps" : ("sdxl_lightning_8step_unet.safetensors", 8),
}

# Inference function.
@spaces.GPU()
def generate(prompt, option, progress=gr.Progress()):
    print(prompt, option)
    ckpt, step = opts[option]

    progress(0, desc="Initializing the model")

    # Main pipeline.
    unet = UNet2DConditionModel.from_config(base, subfolder="unet")
    pipe = StableDiffusionXLPipeline.from_pretrained(base, unet=unet, torch_dtype=dtype, variant="fp16").to(device, dtype)
    pipe.unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device))
    pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", prediction_type="sample" if step == 1 else "epsilon")

    # Safety checker.
    safety_checker=StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker").to(device, dtype)
    feature_extractor=CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
    image_processor = VaeImageProcessor(vae_scale_factor=8)

    def inference_callback(p, i, t, kwargs):
        progress((i+1, step))
        return kwargs
    
    # Inference loop.
    progress((0, step))
    results = pipe(prompt, num_inference_steps=step, guidance_scale=0, callback_on_step_end=inference_callback, output_type="pt")

    # Safety check.
    feature_extractor_input = image_processor.postprocess(results.images, output_type="pil")
    safety_checker_input = feature_extractor(feature_extractor_input, return_tensors="pt")
    pixel_values = safety_checker_input.pixel_values.to(device, dtype)
    images, has_nsfw_concept = safety_checker(
        images=results.images, clip_input=pixel_values
    )
    if has_nsfw_concept[0]:
        print(f"Safety checker triggered on prompt: {prompt}")
    return images[0]

with gr.Blocks(css="style.css") as demo:
    gr.HTML(
        "<h1><center>SDXL-Lightning</center></h1>" +
        "<p><center>Lightning-fast text-to-image generation</center></p>" +
        "<p><center><a href='https://huggingface.co/ByteDance/SDXL-Lightning'>https://huggingface.co/ByteDance/SDXL-Lightning</a></center></p>"
    )
    
    with gr.Row():
        prompt = gr.Textbox(
            label="Text prompt",
            scale=8
        )
        option = gr.Dropdown(
            label="Inference steps",
            choices=["1 Step", "2 Steps", "4 Steps", "8 Steps"],
            value="4 Steps",
            interactive=True
        )
        submit = gr.Button(
            scale=1,
            variant="primary"
        )
    
    img = gr.Image(label="SDXL-Lightning Generated Image")

    prompt.submit(
        fn=generate,
        inputs=[prompt, option],
        outputs=img,
    )
    submit.click(
        fn=generate,
        inputs=[prompt, option],
        outputs=img,
    )

    gr.Examples(
        fn=generate,
        examples=[
            ["An owl perches quietly on a twisted branch deep within an ancient forest.", "1 Step"],
            ["A lion in the galaxy, octane render", "2 Steps"],
            ["A dolphin leaps through the waves, set against a backdrop of bright blues and teal hues.", "2 Steps"],
            ["A girl smiling", "4 Steps"],
            ["An astronaut riding a horse", "4 Steps"],
            ["A fish on a bicycle, colorful art", "4 Steps"],
            ["A close-up of an Asian lady with sunglasses.", "4 Steps"],
            ["Rabbit portrait in a forest, fantasy", "4 Steps"],
            ["A panda swimming", "4 Steps"],
            ["Man portrait, ethereal", "8 Steps"],
        ],
        inputs=[prompt, option],
        outputs=img,
        cache_examples=False,
    )

    gr.HTML(
        "<p><small><center>This demo is built together by the community</center></small></p>"
    )
    
demo.queue().launch()