radames's picture
radames HF staff
Update app.py
b464ed9
from diffusers import DiffusionPipeline, LCMScheduler, AutoencoderTiny
from compel import Compel, ReturnedEmbeddingsType
import torch
import os
try:
import intel_extension_for_pytorch as ipex
except:
pass
from PIL import Image
import numpy as np
import gradio as gr
import psutil
from sfast.compilers.stable_diffusion_pipeline_compiler import (
compile,
CompilationConfig,
)
SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
HF_TOKEN = os.environ.get("HF_TOKEN", None)
# check if MPS is available OSX only M1/M2/M3 chips
mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available()
device = torch.device(
"cuda" if torch.cuda.is_available() else "xpu" if xpu_available else "cpu"
)
torch_device = device
torch_dtype = torch.float16
print(f"SAFETY_CHECKER: {SAFETY_CHECKER}")
print(f"device: {device}")
if mps_available:
device = torch.device("mps")
torch_device = "cpu"
torch_dtype = torch.float32
model_id = "stabilityai/stable-diffusion-xl-base-1.0"
if SAFETY_CHECKER == "True":
pipe = DiffusionPipeline.from_pretrained(model_id)
else:
pipe = DiffusionPipeline.from_pretrained(model_id, safety_checker=None)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.load_lora_weights(
"latent-consistency/lcm-lora-sdxl",
use_auth_token=HF_TOKEN,
)
if device.type != "mps":
pipe.unet.to(memory_format=torch.channels_last)
pipe.to(device=torch_device, dtype=torch_dtype).to(device)
# Load LCM LoRA
config = CompilationConfig.Default()
config.enable_xformers = True
config.enable_triton = True
config.enable_cuda_graph = True
pipe = compile(pipe, config=config)
compel_proc = Compel(
tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
requires_pooled=[False, True],
)
def predict(
prompt,
guidance,
steps,
seed=1231231,
randomize_bt=False,
progress=gr.Progress(track_tqdm=True),
):
if randomize_bt:
seed = np.random.randint(0, 2**32 - 1)
generator = torch.manual_seed(seed)
prompt_embeds, pooled_prompt_embeds = compel_proc(prompt)
results = pipe(
prompt_embeds=prompt_embeds,
pooled_prompt_embeds=pooled_prompt_embeds,
generator=generator,
num_inference_steps=steps,
guidance_scale=guidance,
width=1024,
height=1024,
# original_inference_steps=params.lcm_steps,
output_type="pil",
)
nsfw_content_detected = (
results.nsfw_content_detected[0]
if "nsfw_content_detected" in results
else False
)
if nsfw_content_detected:
raise gr.Error("NSFW content detected.")
return results.images[0], seed
css = """
#container{
margin: 0 auto;
max-width: 40rem;
}
#intro{
max-width: 100%;
text-align: center;
margin: 0 auto;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="container"):
gr.Markdown(
"""# SDXL in 4 steps with Latent Consistency LoRAs
SDXL is loaded with a LCM-LoRA, giving it the super power of doing inference in as little as 4 steps. [Learn more on our blog](https://huggingface.co/blog/lcm_lora) or [technical report](https://huggingface.co/papers/2311.05556).
""",
elem_id="intro",
)
with gr.Row():
with gr.Row():
prompt = gr.Textbox(
placeholder="Insert your prompt here:", scale=5, container=False
)
generate_bt = gr.Button("Generate", scale=1)
image = gr.Image(type="filepath")
with gr.Accordion("Advanced options", open=False):
guidance = gr.Slider(
label="Guidance", minimum=0.0, maximum=5, value=0.3, step=0.001
)
steps = gr.Slider(label="Steps", value=4, minimum=2, maximum=10, step=1)
with gr.Row():
seed = gr.Slider(
randomize=True,
minimum=0,
maximum=12013012031030,
label="Seed",
step=1,
scale=5,
)
with gr.Group():
randomize_bt = gr.Checkbox(label="Randomize", value=False)
random_seed = gr.Textbox(show_label=False)
with gr.Accordion("Run with diffusers"):
gr.Markdown(
"""## Running LCM-LoRAs it with `diffusers`
```bash
pip install diffusers==0.23.0
```
```py
from diffusers import DiffusionPipeline, LCMScheduler
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda")
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl") #yes, it's a normal LoRA
results = pipe(
prompt="The spirit of a tamagotchi wandering in the city of Vienna",
num_inference_steps=4,
guidance_scale=0.0,
)
results.images[0]
```
"""
)
inputs = [prompt, guidance, steps, seed, randomize_bt]
generate_bt.click(fn=predict, inputs=inputs, outputs=[image, random_seed])
demo.queue(api_open=False)
demo.launch(show_api=False)