import gradio as gr
import torch
import modin.pandas as pd
from diffusers import DiffusionPipeline
device = "cuda" if torch.cuda.is_available() else "cpu"
if torch.cuda.is_available():
PYTORCH_CUDA_ALLOC_CONF={'max_split_size_mb': 6000}
torch.cuda.max_memory_allocated(device=device)
torch.cuda.empty_cache()
pipe = DiffusionPipeline.from_pretrained("SG161222/RealVisXL_V1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
pipe.enable_xformers_memory_efficient_attention()
pipe = pipe.to(device)
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
torch.cuda.empty_cache()
refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", use_safetensors=True, torch_dtype=torch.float16, variant="fp16")
refiner.enable_xformers_memory_efficient_attention()
refiner.enable_sequential_cpu_offload()
refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True)
else:
pipe = DiffusionPipeline.from_pretrained("SG161222/RealVisXL_V1.0", use_safetensors=True)
pipe = pipe.to(device)
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", use_safetensors=True)
refiner = refiner.to(device)
refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True)
def genie (prompt, negative_prompt, height, width, scale, steps, seed, prompt_2, negative_prompt_2, high_noise_frac):
generator = np.random.seed(0) if seed == 0 else torch.manual_seed(seed)
int_image = pipe(prompt, prompt_2=prompt_2, negative_prompt_2=negative_prompt_2, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale, num_images_per_prompt=1, generator=generator, output_type="latent").images
image = refiner(prompt=prompt, prompt_2=prompt_2, negative_prompt=negative_prompt, negative_prompt_2=negative_prompt_2, image=int_image, denoising_start=high_noise_frac).images[0]
return image
gr.Interface(fn=genie, inputs=[gr.Textbox(label='Positive Promt. 77 Token Limit.'),
gr.Textbox(label='Negative Prompt.'),
gr.Slider(512, 1024, 768, step=128, label='Height'),
gr.Slider(512, 1024, 768, step=128, label='Width'),
gr.Slider(1, 15, 10, label='Guidance Scale'),
gr.Slider(25, maximum=50, value=25, step=1, label='Number of Iterations'),
gr.Slider(minimum=1, step=1, maximum=999999999999999999, randomize=True),
gr.Textbox(label='Embedded Prompt'),
gr.Textbox(label='Embedded Negative Prompt'),
gr.Slider(minimum=.7, maximum=.99, value=.95, step=.01, label='Refiner Denoise Start %')],
outputs='image',
title="Realistic Vision XL V1.0 Demo by SG161222",
description="The model is still in the training phase. This is not the final version and may contain artifacts and perform poorly in some cases. Currently running on CPU",
article="Please use the prompt template below to get an example of the desired generation results:
Positive prompt: dark shot, photo of cute 24 y.o blonde woman, perfect eyes, skin moles, short hair, looks at viewer, cinematic shot, hard shadows
Negative prompt: (worst quality, low quality, illustration, 3d, 2d, painting, cartoons, sketch), open mouth
Denoising strength: 0.25-0.5, CFG scale: 7, Seed: 4271781772
WARNING: Be patient, as generation is Slow.
65s/Iteration. Expected Generation Time is 25-50mins an image for 25-50 iterations respectively. This model is capable of producing mild NSFW images"
).launch(debug=True, max_threads=80)