Tonic's picture
Update app.py
53ad954
raw
history blame
2.82 kB
import gradio as gr
from diffusers import StableDiffusionXLPipeline, DDIMScheduler
import torch
import numpy as np
from PIL import Image
import io
import sys
import os
import sa_handler
import inversion
# Model Load
scheduler = DDIMScheduler(
beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear",
clip_sample=False, set_alpha_to_one=False)
pipeline = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16",
use_safetensors=True,
scheduler=scheduler
).to("cuda")
# Function to process the image
def process_image(image, prompt, style, src_description, inference_steps, shared_score_shift, shared_score_scale, guidance_scale):
src_prompt = f'{src_description}, {style}.'
num_inference_steps = inference_steps
x0 = np.array(Image.fromarray(image).resize((1024, 1024)))
zts = inversion.ddim_inversion(pipeline, x0, src_prompt, num_inference_steps, 2)
prompts = [
src_prompt,
f"{prompt}, {style}."
]
handler = sa_handler.Handler(pipeline)
sa_args = sa_handler.StyleAlignedArgs(
share_group_norm=True, share_layer_norm=True, share_attention=True,
adain_queries=True, adain_keys=True, adain_values=False,
shared_score_shift=shared_score_shift, shared_score_scale=shared_score_scale,)
handler.register(sa_args)
zT, inversion_callback = inversion.make_inversion_callback(zts, offset=5)
g_cpu = torch.Generator(device='cpu')
g_cpu.manual_seed(10)
latents = torch.randn(len(prompts), 4, 128, 128, device='cpu', generator=g_cpu,
dtype=pipeline.unet.dtype,).to('cuda:0')
latents[0] = zT
images_a = pipeline(prompts, latents=latents,
callback_on_step_end=inversion_callback,
num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).images
handler.remove()
return Image.fromarray(images_a[1])
iface = gr.Interface(
fn=process_image,
inputs=[
gr.Image(type="numpy"),
gr.Textbox(label="Enter your prompt"),
gr.Textbox(label="Enter your style", default="medieval painting"),
gr.Textbox(label="Enter source description", default="Man laying in a bed"),
gr.Slider(minimum=5, maximum=50, step=1, default=50, label="Number of Inference Steps"),
gr.Slider(minimum=1, maximum=2, step=0.01, default=1.5, label="Shared Score Shift"),
gr.Slider(minimum=0, maximum=1, step=0.01, default=0.5, label="Shared Score Scale"),
gr.Slider(minimum=5, maximum=120, step=1, default=10, label="Guidance Scale")
],
outputs="image",
title="Stable Diffusion XL with Style Alignment",
description="Generate images in the style of your choice."
)
iface.launch()