import gradio as gr import torch import spaces from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker from diffusers.image_processor import VaeImageProcessor from transformers import CLIPImageProcessor from huggingface_hub import hf_hub_download from safetensors.torch import load_file device = "cuda" dtype = torch.float16 base = "stabilityai/stable-diffusion-xl-base-1.0" repo = "ByteDance/SDXL-Lightning" opts = { "1 Step" : ("sdxl_lightning_1step_unet_x0.safetensors", 1), "2 Steps" : ("sdxl_lightning_2step_unet.safetensors", 2), "4 Steps" : ("sdxl_lightning_4step_unet.safetensors", 4), "8 Steps" : ("sdxl_lightning_8step_unet.safetensors", 8), } # Inference function. @spaces.GPU() def generate(prompt, option, progress=gr.Progress()): print(prompt, option) ckpt, step = opts[option] progress(0, desc="Initializing the model") # Main pipeline. unet = UNet2DConditionModel.from_config(base, subfolder="unet") pipe = StableDiffusionXLPipeline.from_pretrained(base, unet=unet, torch_dtype=dtype, variant="fp16").to(device, dtype) pipe.unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device)) pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", prediction_type="sample" if step == 1 else "epsilon") # Safety checker. safety_checker=StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker").to(device, dtype) feature_extractor=CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32") image_processor = VaeImageProcessor(vae_scale_factor=8) def inference_callback(p, i, t, kwargs): progress((i+1, step)) return kwargs # Inference loop. progress((0, step)) results = pipe(prompt, num_inference_steps=step, guidance_scale=0, callback_on_step_end=inference_callback, output_type="pt") # Safety check. feature_extractor_input = image_processor.postprocess(results.images, output_type="pil") safety_checker_input = feature_extractor(feature_extractor_input, return_tensors="pt") pixel_values = safety_checker_input.pixel_values.to(device, dtype) images, has_nsfw_concept = safety_checker( images=results.images, clip_input=pixel_values ) if has_nsfw_concept[0]: print(f"Safety checker triggered on prompt: {prompt}") return images[0] with gr.Blocks(css="style.css") as demo: gr.HTML( "

SDXL-Lightning

" + "

Lightning-fast text-to-image generation

" + "

https://huggingface.co/ByteDance/SDXL-Lightning

" ) with gr.Row(): prompt = gr.Textbox( label="Text prompt", scale=8 ) option = gr.Dropdown( label="Inference steps", choices=["1 Step", "2 Steps", "4 Steps", "8 Steps"], value="4 Steps", interactive=True ) submit = gr.Button( scale=1, variant="primary" ) img = gr.Image(label="SDXL-Lightning Generated Image") prompt.submit( fn=generate, inputs=[prompt, option], outputs=img, ) submit.click( fn=generate, inputs=[prompt, option], outputs=img, ) gr.Examples( fn=generate, examples=[ ["An owl perches quietly on a twisted branch deep within an ancient forest.", "1 Step"], ["A lion in the galaxy, octane render", "2 Steps"], ["A dolphin leaps through the waves, set against a backdrop of bright blues and teal hues.", "2 Steps"], ["A girl smiling", "4 Steps"], ["An astronaut riding a horse", "4 Steps"], ["A fish on a bicycle, colorful art", "4 Steps"], ["A close-up of an Asian lady with sunglasses.", "4 Steps"], ["Rabbit portrait in a forest, fantasy", "4 Steps"], ["A panda swimming", "4 Steps"], ["Man portrait, ethereal", "8 Steps"], ], inputs=[prompt, option], outputs=img, cache_examples=False, ) gr.HTML( "

This demo is built together by the community

" ) demo.queue().launch()