import gradio as gr import torch import modin.pandas as pd from diffusers import DiffusionPipeline device = "cuda" if torch.cuda.is_available() else "cpu" if torch.cuda.is_available(): PYTORCH_CUDA_ALLOC_CONF={'max_split_size_mb': 6000} torch.cuda.max_memory_allocated(device=device) torch.cuda.empty_cache() pipe = DiffusionPipeline.from_pretrained("SG161222/RealVisXL_V1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True) pipe.enable_xformers_memory_efficient_attention() pipe = pipe.to(device) pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) torch.cuda.empty_cache() refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", use_safetensors=True, torch_dtype=torch.float16, variant="fp16") refiner.enable_xformers_memory_efficient_attention() refiner.enable_sequential_cpu_offload() refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True) else: pipe = DiffusionPipeline.from_pretrained("SG161222/RealVisXL_V1.0", use_safetensors=True) pipe = pipe.to(device) pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", use_safetensors=True) refiner = refiner.to(device) refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True) def genie (prompt, negative_prompt, height, width, scale, steps, seed, prompt_2, negative_prompt_2, high_noise_frac): generator = np.random.seed(0) if seed == 0 else torch.manual_seed(seed) int_image = pipe(prompt, prompt_2=prompt_2, negative_prompt_2=negative_prompt_2, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale, num_images_per_prompt=1, generator=generator, output_type="latent").images image = refiner(prompt=prompt, prompt_2=prompt_2, negative_prompt=negative_prompt, negative_prompt_2=negative_prompt_2, image=int_image, denoising_start=high_noise_frac).images[0] return image gr.Interface(fn=genie, inputs=[gr.Textbox(label='Positive Promt. 77 Token Limit.'), gr.Textbox(label='Negative Prompt.'), gr.Slider(512, 1024, 768, step=128, label='Height'), gr.Slider(512, 1024, 768, step=128, label='Width'), gr.Slider(1, 15, 10, label='Guidance Scale'), gr.Slider(25, maximum=50, value=25, step=1, label='Number of Iterations'), gr.Slider(minimum=1, step=1, maximum=999999999999999999, randomize=True), gr.Textbox(label='Embedded Prompt'), gr.Textbox(label='Embedded Negative Prompt'), gr.Slider(minimum=.7, maximum=.99, value=.95, step=.01, label='Refiner Denoise Start %')], outputs='image', title="Realistic Vision XL V1.0 Demo by SG161222", description="The model is still in the training phase. This is not the final version and may contain artifacts and perform poorly in some cases. Currently running on CPU", article="Please use the prompt template below to get an example of the desired generation results:

Positive prompt: dark shot, photo of cute 24 y.o blonde woman, perfect eyes, skin moles, short hair, looks at viewer, cinematic shot, hard shadows

Negative prompt: (worst quality, low quality, illustration, 3d, 2d, painting, cartoons, sketch), open mouth
Denoising strength: 0.25-0.5, CFG scale: 7, Seed: 4271781772

WARNING: Be patient, as generation is Slow.
65s/Iteration. Expected Generation Time is 25-50mins an image for 25-50 iterations respectively. This model is capable of producing mild NSFW images" ).launch(debug=True, max_threads=80)