from diffusers import DiffusionPipeline import torch import gradio as gr # load both base & refiner base = DiffusionPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", # torch_dtype=torch.float16, # variant="fp16", use_safetensors=True, use_onnx=False ) base.unet = torch.compile(base.unet, mode="reduce-overhead", fullgraph=True) print("Base Device", base.device) base.to("cpu") # base.to("cuda") # base.enable_model_cpu_offload() refiner = DiffusionPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-refiner-1.0", text_encoder_2=base.text_encoder_2, vae=base.vae, # torch_dtype=torch.float16, # variant="fp16", use_safetensors=True, use_onnx=False ) refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True) print("Refiner Device", refiner.device) refiner.to("cpu") # refiner.to("cuda") # refiner.enable_model_cpu_offload() def getImage(prompt): # Define how many steps and what % of steps to be run on each experts (80/20) here n_steps = 40 high_noise_frac = 0.8 prompt = prompt # run both experts image = base( prompt=prompt, num_inference_steps=n_steps, denoising_end=high_noise_frac, output_type="latent", ).images image = refiner( prompt=prompt, num_inference_steps=n_steps, denoising_start=high_noise_frac, image=image, ).images[0] return image demo = gr.Interface(fn=getImage, inputs="text", outputs="image") demo.launch()