from diffusers import DiffusionPipeline, LCMScheduler, AutoencoderTiny
from compel import Compel, ReturnedEmbeddingsType
import torch
import os

try:
    import intel_extension_for_pytorch as ipex
except:
    pass

from PIL import Image
import numpy as np
import gradio as gr
import psutil


SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
HF_TOKEN = os.environ.get("HF_TOKEN", None)
# check if MPS is available OSX only M1/M2/M3 chips
mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available()
device = torch.device(
    "cuda" if torch.cuda.is_available() else "xpu" if xpu_available else "cpu"
)
torch_device = device
torch_dtype = torch.float16

print(f"SAFETY_CHECKER: {SAFETY_CHECKER}")
print(f"TORCH_COMPILE: {TORCH_COMPILE}")
print(f"device: {device}")

if mps_available:
    device = torch.device("mps")
    torch_device = "cpu"
    torch_dtype = torch.float32

model_id = "stabilityai/stable-diffusion-xl-base-1.0"

if SAFETY_CHECKER == "True":
    pipe = DiffusionPipeline.from_pretrained(model_id)
else:
    pipe = DiffusionPipeline.from_pretrained(model_id, safety_checker=None)

pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.to(device=torch_device, dtype=torch_dtype).to(device)
pipe.unet.to(memory_format=torch.channels_last)

# check if computer has less than 64GB of RAM using sys or os
if psutil.virtual_memory().total < 64 * 1024**3:
    pipe.enable_attention_slicing()

if TORCH_COMPILE:
    pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
    pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)

    pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)

# Load LCM LoRA
pipe.load_lora_weights(
    "lcm-sd/lcm-sdxl-lora",
    weight_name="lcm_sdxl_lora.safetensors",
    #adapter_name="lcm",
    use_auth_token=HF_TOKEN,
)

## Load papercut LoRA
#pipe.load_lora_weights(
#    "TheLastBen/Papercut_SDXL",
#    weight_name="papercut.safetensors",
#    adapter_name="papercut",
#)

# Mix the LoRAs
#pipe.set_adapters(["lcm", "papercut"], adapter_weights=[1.0, 0.8])

compel_proc = Compel(
    tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
    text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
    returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
    requires_pooled=[False, True],
)


def predict(
    prompt, guidance, steps, seed=1231231, progress=gr.Progress(track_tqdm=True)
):
    generator = torch.manual_seed(seed)
    prompt_embeds, pooled_prompt_embeds = compel_proc(prompt)

    results = pipe(
        prompt_embeds=prompt_embeds,
        pooled_prompt_embeds=pooled_prompt_embeds,
        generator=generator,
        num_inference_steps=steps,
        guidance_scale=guidance,
        width=1024,
        height=1024,
        # original_inference_steps=params.lcm_steps,
        output_type="pil",
    )
    nsfw_content_detected = (
        results.nsfw_content_detected[0]
        if "nsfw_content_detected" in results
        else False
    )
    if nsfw_content_detected:
        raise gr.Error("NSFW content detected.")
    return results.images[0]


css = """
#container{
    margin: 0 auto;
    max-width: 50rem;
}
#intro{
    max-width: 32rem;
    text-align: center;
    margin: 0 auto;
}
"""
with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="container"):
        gr.Markdown(
            """# Ultra-Fast SDXL with Latent Consistency LoRA
            In this Space, SDXL is loaded with a latent consistency LoRA, giving it the super power of doing inference in as little as 4 steps. [Learn more on our blog](#) or [technical report](#).
            """,
            elem_id="intro",
        )
        with gr.Row():
            with gr.Row():
                prompt = gr.Textbox(
                    placeholder="Insert your prompt here:", value="papercut style of a cute monster", scale=5, container=False
                )
                generate_bt = gr.Button("Generate", scale=1)
        
        image = gr.Image(type="filepath")
        with gr.Accordion("Advanced options", open=False):
            guidance = gr.Slider(
                label="Guidance", minimum=0.0, maximum=5, value=0.3, step=0.001
            )
            steps = gr.Slider(label="Steps", value=4, minimum=2, maximum=10, step=1)
            seed = gr.Slider(
                randomize=True, minimum=0, maximum=12013012031030, label="Seed", step=1
            )
        with gr.Group():
            gr.Markdown('''## Using it with `diffusers`
            ```py
            from diffusers import DiffusionPipeline, LCMScheduler
            pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0").to("cuda") 
            pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
            pipe.load_lora_weights("lcm-sd/lcm-sdxl-lora")

            results = pipe(
                prompt="The spirit of a tamagotchi wandering in the city of Vienna",
                num_inference_steps=4,
                guidance_scale=0.5,
            )
            results.images[0]
            ```
            ''')
            
        inputs = [prompt, guidance, steps, seed]
        generate_bt.click(fn=predict, inputs=inputs, outputs=image)

demo.queue()
demo.launch()