import gradio as gr from diffusers import StableDiffusionXLPipeline, DDIMScheduler import torch import numpy as np from PIL import Image import io import sys import os import sa_handler import inversion # Model Load scheduler = DDIMScheduler( beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False) pipeline = StableDiffusionXLPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True, scheduler=scheduler ).to("cuda") # Function to process the image def process_image(image, prompt, style, src_description, inference_steps, shared_score_shift, shared_score_scale, guidance_scale): src_prompt = f'{src_description}, {style}.' num_inference_steps = inference_steps x0 = np.array(Image.fromarray(image).resize((1024, 1024))) zts = inversion.ddim_inversion(pipeline, x0, src_prompt, num_inference_steps, 2) prompts = [ src_prompt, f"{prompt}, {style}." ] handler = sa_handler.Handler(pipeline) sa_args = sa_handler.StyleAlignedArgs( share_group_norm=True, share_layer_norm=True, share_attention=True, adain_queries=True, adain_keys=True, adain_values=False, shared_score_shift=shared_score_shift, shared_score_scale=shared_score_scale,) handler.register(sa_args) zT, inversion_callback = inversion.make_inversion_callback(zts, offset=5) g_cpu = torch.Generator(device='cpu') g_cpu.manual_seed(10) latents = torch.randn(len(prompts), 4, 128, 128, device='cpu', generator=g_cpu, dtype=pipeline.unet.dtype,).to('cuda:0') latents[0] = zT images_a = pipeline(prompts, latents=latents, callback_on_step_end=inversion_callback, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).images handler.remove() return Image.fromarray(images_a[1]) iface = gr.Interface( fn=process_image, inputs=[ gr.Image(type="numpy"), gr.Textbox(label="Enter your prompt"), gr.Textbox(label="Enter your style", default="medieval painting"), gr.Textbox(label="Enter source description", default="Man laying in a bed"), gr.Slider(minimum=5, maximum=50, step=1, default=50, label="Number of Inference Steps"), gr.Slider(minimum=1, maximum=2, step=0.01, default=1.5, label="Shared Score Shift"), gr.Slider(minimum=0, maximum=1, step=0.01, default=0.5, label="Shared Score Scale"), gr.Slider(minimum=5, maximum=120, step=1, default=10, label="Guidance Scale") ], outputs="image", title="Stable Diffusion XL with Style Alignment", description="Generate images in the style of your choice." ) iface.launch()