from diffusers import DiffusionPipeline, LCMScheduler, AutoencoderTiny import torch import os try: import intel_extension_for_pytorch as ipex except: pass from PIL import Image import numpy as np import gradio as gr import psutil import time SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None) TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None) HF_TOKEN = os.environ.get("HF_TOKEN", None) # check if MPS is available OSX only M1/M2/M3 chips mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available() xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available() device = torch.device( "cuda" if torch.cuda.is_available() else "xpu" if xpu_available else "cpu" ) torch_device = device torch_dtype = torch.float16 print(f"SAFETY_CHECKER: {SAFETY_CHECKER}") print(f"TORCH_COMPILE: {TORCH_COMPILE}") print(f"device: {device}") if mps_available: device = torch.device("mps") torch_device = "cpu" torch_dtype = torch.float32 if SAFETY_CHECKER == "True": pipe = DiffusionPipeline.from_pretrained("Lykon/dreamshaper-7") else: pipe = DiffusionPipeline.from_pretrained("Lykon/dreamshaper-7", safety_checker=None) pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) pipe.to(device=torch_device, dtype=torch_dtype).to(device) pipe.unet.to(memory_format=torch.channels_last) pipe.set_progress_bar_config(disable=True) # check if computer has less than 64GB of RAM using sys or os if psutil.virtual_memory().total < 64 * 1024**3: pipe.enable_attention_slicing() if TORCH_COMPILE: pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True) pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0) # Load LCM LoRA pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5") pipe.fuse_lora() def predict(prompt, negative_prompt, guidance, steps, seed=1231231): generator = torch.manual_seed(seed) last_time = time.time() results = pipe( prompt=prompt, generator=generator, negative_prompt=negative_prompt, num_inference_steps=steps, guidance_scale=guidance, width=512, height=512, # original_inference_steps=params.lcm_steps, output_type="pil", ) print(f"Pipe took {time.time() - last_time} seconds") nsfw_content_detected = ( results.nsfw_content_detected[0] if "nsfw_content_detected" in results else False ) if nsfw_content_detected: gr.Warning("NSFW content detected.") return Image.new("RGB", (512, 512)) return results.images[0] css = """ #container{ margin: 0 auto; max-width: 40rem; } #intro{ max-width: 100%; text-align: center; margin: 0 auto; } """ with gr.Blocks(css=css) as demo: with gr.Column(elem_id="container"): gr.Markdown( """# SD1.5 Latent Consistency LoRAs SD1.5 is loaded with a LCM-LoRA, giving it the super power of doing inference in as little as 4 steps. [Learn more on our blog](#) or [technical report](#). """, elem_id="intro", ) prompt = gr.Textbox( placeholder="Insert your prompt here:", lines= 4, container=False ) negative_prompt = gr.Textbox( placeholder="Insert your negative prompt here:", lines= 4, container=False ) generate_bt = gr.Button("Generate", scale=1) image = gr.Image(type="filepath") with gr.Accordion("Advanced options", open=True): guidance = gr.Slider( label="Guidance", minimum=0.0, maximum=5, value=1.5, step=0.001 ) steps = gr.Slider(label="Steps", value=8, minimum=1, maximum=50, step=1) seed = gr.Slider( randomize=True, minimum=0, maximum=12013012031030, label="Seed", step=1 ) inputs = [prompt, negative_prompt, guidance, steps, seed] generate_bt.click(fn=predict, inputs=inputs, outputs=image, show_progress=False) prompt.input(fn=predict, inputs=inputs, outputs=image, show_progress=False) negative_prompt.input(fn=predict, inputs=inputs, outputs=image, show_progress=False) guidance.change(fn=predict, inputs=inputs, outputs=image, show_progress=False) steps.change(fn=predict, inputs=inputs, outputs=image, show_progress=False) seed.change(fn=predict, inputs=inputs, outputs=image, show_progress=False) demo.queue() demo.launch()