import gradio as gr
import torch
import modin.pandas as pd
from diffusers import DiffusionPipeline 

device = "cuda" if torch.cuda.is_available() else "cpu"
if torch.cuda.is_available():
    PYTORCH_CUDA_ALLOC_CONF={'max_split_size_mb': 6000}
    torch.cuda.max_memory_allocated(device=device)
    torch.cuda.empty_cache()
    pipe = DiffusionPipeline.from_pretrained("SG161222/RealVisXL_V2.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
    pipe.enable_xformers_memory_efficient_attention()
    pipe = pipe.to(device)
    pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
    torch.cuda.empty_cache()
    refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", use_safetensors=True, torch_dtype=torch.float16, variant="fp16")
    refiner.enable_xformers_memory_efficient_attention()
    refiner.enable_sequential_cpu_offload()
    refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True)
else: 
    pipe = DiffusionPipeline.from_pretrained("SG161222/RealVisXL_V2.0", use_safetensors=True)
    pipe = pipe.to(device)
    pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
    refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", use_safetensors=True)
    refiner = refiner.to(device)
    refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True)

def genie (prompt, negative_prompt, height, width, scale, steps, seed, prompt_2, negative_prompt_2, high_noise_frac):
    generator = np.random.seed(0) if seed == 0 else torch.manual_seed(seed)
    int_image = pipe(prompt, prompt_2=prompt_2, negative_prompt_2=negative_prompt_2, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale, num_images_per_prompt=1, generator=generator, output_type="latent").images 
    image = refiner(prompt=prompt, prompt_2=prompt_2, negative_prompt=negative_prompt, negative_prompt_2=negative_prompt_2, image=int_image, denoising_start=high_noise_frac).images[0]   
    return image
    
gr.Interface(fn=genie, inputs=[gr.Textbox(label='Positive Promt. 77 Token Limit.'), 
    gr.Textbox(label='Negative Prompt.'), 
    gr.Slider(512, 1024, 768, step=128, label='Height'),
    gr.Slider(512, 1024, 768, step=128, label='Width'),
    gr.Slider(1, 15, 7, label='Guidance Scale'), 
    gr.Slider(25, maximum=50, value=25, step=1, label='Number of Iterations'), 
    gr.Slider(minimum=1, step=1, maximum=999999999999999999, randomize=True),
    gr.Textbox(label='Embedded Prompt'),
    gr.Textbox(label='Embedded Negative Prompt'),
    gr.Slider(minimum=.7, maximum=.99, value=.95, step=.01, label='Refiner Denoise Start %')],
    outputs='image', 
    title=" 📷 Realistic Vision XL V2.0 Demo by SG161222 📷", 
    description="The model is still in the training phase. This is not the final version and may contain artifacts and perform poorly in some cases. Currently running on <b>CPU</b>",
    article="Demo prompt template below to get an example of the models results:<br><br><b>Positive prompt:</b> dark shot, photo of cute 24 y.o blonde woman, perfect eyes, skin moles, short hair, looks at viewer, cinematic shot, hard shadows<br><br><b>Negative prompt:</b> (worst quality, low quality, illustration, 3d, 2d, painting, cartoons, sketch), open mouth <br><br> Iteration Steps: 25-40, Denoising strength: 0.95-0.99, CFG scale: 7, Seed: 4271781772<br><br> <b>WARNING:</b> Be patient, as generation is Slow.<br>65s/Iteration. Expected Generation Time is 25-40mins an image for 25-40 iterations respectively. This model is capable of producing mild NSFW images"
).launch(debug=True, max_threads=80)