Spaces:

wl-zhao
/

unipc_sdm

Runtime error

File size: 10,875 Bytes

from diffusers import AutoencoderKL, UNet2DConditionModel, StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, UniPCMultistepScheduler
from diffusers import StableDiffusionXLPipeline
from diffusers import StableDiffusionXLImg2ImgPipeline
import gradio as gr
import torch
from PIL import Image
import os

scheduler = UniPCMultistepScheduler(
    beta_start=0.00085,
    beta_end=0.012,
    beta_schedule="scaled_linear",
    num_train_timesteps=1000,
    trained_betas=None,
    thresholding=False,
    predict_x0=True,
    solver_type="bh2",
    lower_order_final=True,
    disable_corrector=[0],
)

class Model:
    def __init__(self, name, path, prefix):
        self.name = name
        self.path = path
        self.prefix = prefix
        self.pipe_t2i = None
        self.pipe_i2i = None

models = [
     Model("Stable-Diffusion-XL", "stabilityai/stable-diffusion-xl-base-1.0", "The XL version of official stable-diffusion"),
     Model("Stable-Diffusion-v1.4", "CompVis/stable-diffusion-v1-4", "The 1.4 version of official stable-diffusion"),
     Model("Waifu", "hakurei/waifu-diffusion", "anime style"),
]

last_mode = "txt2img"
current_model = models[0]
current_model_path = current_model.path

auth_token = os.getenv("HUGGING_FACE_HUB_TOKEN")

print(f"Is CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
  vae = AutoencoderKL.from_pretrained(current_model.path, subfolder="vae", torch_dtype=torch.float16, use_auth_token=auth_token)
  for model in models:
    try:
        if 'XL'in model.name:
            PipeClass  = StableDiffusionXLPipeline
            PipeI2IClass = StableDiffusionXLImg2ImgPipeline
        else:
            PipeClass  = StableDIffusionXPipeline
            PipeI2IClass = StableDiffusionImg2ImgPipeline

        unet = UNet2DConditionModel.from_pretrained(model.path, subfolder="unet", torch_dtype=torch.float16, use_auth_token=auth_token)
        model.pipe_t2i = PipeClass.from_pretrained(model.path, unet=unet, vae=vae, torch_dtype=torch.float16, scheduler=scheduler, use_auth_token=auth_token)
        model.pipe_i2i = PipeI2IClass.from_pretrained(model.path, unet=unet, vae=vae, torch_dtype=torch.float16, scheduler=scheduler, use_auth_token=auth_token)
    except:
        models.remove(model)
  pipe = models[0].pipe_t2i
  pipe = pipe.to("cuda")

else:
  vae = AutoencoderKL.from_pretrained(current_model.path, subfolder="vae", use_auth_token=auth_token)
  for model in models:
    try:
        unet = UNet2DConditionModel.from_pretrained(model.path, subfolder="unet", use_auth_token=auth_token)
        model.pipe_t2i = StableDiffusionPipeline.from_pretrained(model.path, unet=unet, vae=vae, scheduler=scheduler, use_auth_token=auth_token)
        model.pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(model.path, unet=unet, vae=vae, scheduler=scheduler, use_auth_token=auth_token)
    except:
        models.remove(model)
  pipe = models[0].pipe_t2i
  pipe = pipe.to("cpu")

device = "GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"

def inference(model_name, prompt, guidance, steps, width=512, height=512, seed=0, img=None, strength=0.5, neg_prompt=""):

  global current_model
  for model in models:
    if model.name == model_name:
      current_model = model
      model_path = current_model.path

  generator = torch.Generator('cuda' if torch.cuda.is_available() else 'cpu').manual_seed(seed) if seed != 0 else None

  if img is not None:
    return img_to_img(model_path, prompt, neg_prompt, img, strength, guidance, steps, width, height, generator)
  else:
    return txt_to_img(model_path, prompt, neg_prompt, guidance, steps, width, height, generator)

def txt_to_img(model_path, prompt, neg_prompt, guidance, steps, width, height, generator=None):

    global last_mode
    global pipe
    global current_model_path
    if model_path != current_model_path or last_mode != "txt2img":
        current_model_path = model_path

        pipe.to("cpu")
        pipe = current_model.pipe_t2i

        if torch.cuda.is_available():
          pipe = pipe.to("cuda")
        last_mode = "txt2img"

    prompt = current_model.prefix + prompt
    result = pipe(
      prompt,
      negative_prompt = neg_prompt,
      # num_images_per_prompt=n_images,
      num_inference_steps = int(steps),
      guidance_scale = guidance,
      width = width,
      height = height,
      generator = generator)
    
    return replace_nsfw_images(result)

def img_to_img(model_path, prompt, neg_prompt, img, strength, guidance, steps, width, height, generator=None):

    global last_mode
    global pipe
    global current_model_path
    if model_path != current_model_path or last_mode != "img2img":
        current_model_path = model_path

        pipe.to("cpu")
        pipe = current_model.pipe_i2i
        
        if torch.cuda.is_available():
          pipe = pipe.to("cuda")
        last_mode = "img2img"

    prompt = current_model.prefix + prompt
    ratio = min(height / img.height, width / img.width)
    img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
    result = pipe(
        prompt,
        negative_prompt = neg_prompt,
        # num_images_per_prompt=n_images,
        image = img,
        num_inference_steps = int(steps),
        strength = strength,
        guidance_scale = guidance,
        #width = width,
        #height = height,
        generator = generator)
        
    return replace_nsfw_images(result)

def replace_nsfw_images(results):
    for i in range(len(results.images)):
      try:
        if results.nsfw_content_detected[i]:
          results.images[i] = Image.open("nsfw.png")
      except:
        pass
    return results.images[0]

css = """
  <style>
  .finetuned-diffusion-div {
      text-align: center;
      max-width: 700px;
      margin: 0 auto;
      font-family: 'IBM Plex Sans', sans-serif;
    }
    .finetuned-diffusion-div div {
      display: inline-flex;
      align-items: center;
      gap: 0.8rem;
      font-size: 1.75rem;
    }
    .finetuned-diffusion-div div h1 {
      font-weight: 900;
      margin-top: 15px;
      margin-bottom: 15px;
      text-align: center;
      line-height: 150%;
    }
    .finetuned-diffusion-div p {
      margin-bottom: 10px;
      font-size: 94%;
    }
    .finetuned-diffusion-div p a {
      text-decoration: underline;
    }
    .tabs {
      margin-top: 0px;
      margin-bottom: 0px;
    }
    #gallery {
      min-height: 20rem;
    }
    .container {
      max-width: 1000px;
      margin: auto;
      padding-top: 1.5rem;
    }
  </style>
"""
with gr.Blocks(css=css) as demo:
    gr.HTML(
        f"""
            <div class="finetuned-diffusion-div">
              <div>
                <h1>Stable-Diffusion with UniPC</h1>
              </div>
              <br>
              <p>
              ❤️ Acknowledgement: Hardware resources of this demo are supported by HuggingFace 🤗 . Many thanks for the help!
              </p>
              <br>
              <p>
               This is a demo of sampling by UniPC with two variants of Stable Diffusion models, including <a href="https://huggingface.co/CompVis/stable-diffusion-v1-4">Stable-Diffusion-v1.4</a> and <a href="https://huggingface.co/hakurei/waifu-diffusion">Waifu</a>.
              </p>
              <br>
              <p>
               <a href="https://github.com/wl-zhao/UniPC">UniPC</a> is a training-free framework designed for the fast sampling of diffusion models, which consists of a corrector (UniC) and a predictor (UniP) that share a unified analytical form and support arbitrary orders.
              </p>
              <p>
              We use <a href="https://github.com/huggingface/diffusers">Diffusers</a>  🧨  to implement this demo, which currently supports the multistep UniPC scheduler. For more details of UniPC with Diffusers, check <a href="https://github.com/huggingface/diffusers/pull/2373">this pull request</a>.
              </p>
              <br>
              <br>
              <p>
               Running on <b>{device}</b>
              </p>
            </div>
        """
    )

    with gr.Row():
        
        with gr.Column(scale=55):
          with gr.Group():
              model_name = gr.Dropdown(label="Model", choices=[m.name for m in models], value=current_model.name)
              with gr.Row():
                prompt = gr.Textbox(label="Prompt", show_label=False, max_lines=2,placeholder="Enter prompt. Style applied automatically").style(container=False)
                generate = gr.Button(value="Generate").style(rounded=(False, True, True, False))


              image_out = gr.Image(height=512)
              # gallery = gr.Gallery(
              #     label="Generated images", show_label=False, elem_id="gallery"
              # ).style(grid=[1], height="auto")

        with gr.Column(scale=45):
          with gr.Tab("Options"):
            with gr.Group():
              neg_prompt = gr.Textbox(label="Negative prompt", placeholder="What to exclude from the image")

              # n_images = gr.Slider(label="Images", value=1, minimum=1, maximum=4, step=1)

              with gr.Row():
                guidance = gr.Slider(label="Guidance scale", value=7.5, maximum=15)
                steps = gr.Slider(label="Steps", value=25, minimum=2, maximum=100, step=1)

              with gr.Row():
                width = gr.Slider(label="Width", value=512, minimum=64, maximum=1024, step=8)
                height = gr.Slider(label="Height", value=512, minimum=64, maximum=1024, step=8)

              seed = gr.Slider(0, 2147483647, label='Seed (0 = random)', value=0, step=1)

          with gr.Tab("Image to image"):
              with gr.Group():
                image = gr.Image(label="Image", height=256, tool="editor", type="pil")
                strength = gr.Slider(label="Transformation strength", minimum=0, maximum=1, step=0.01, value=0.5)

    # model_name.change(lambda x: gr.update(visible = x == models[0].name), inputs=model_name, outputs=custom_model_group)

    inputs = [model_name, prompt, guidance, steps, width, height, seed, image, strength, neg_prompt]
    prompt.submit(inference, inputs=inputs, outputs=image_out)

    generate.click(inference, inputs=inputs, outputs=image_out)


    gr.Markdown('''
      Stable-diffusion Models by [CompVis](https://huggingface.co/CompVis) and [stabilityai](https://huggingface.co/stabilityai), Waifu-diffusion models by [@hakurei](https://huggingface.co/hakurei). Most of the code of this demo are copied from [@anzorq's fintuned-diffusion](https://huggingface.co/spaces/anzorq/finetuned_diffusion/tree/main) ❤️<br>
      Space by [Wenliang Zhao](https://github.com/wl-zhao). 
        
      ![visitors](https://visitor-badge.glitch.me/badge?page_id=wl-zhao.unipc_sdm)
    ''')

demo.queue(concurrency_count=1)
demo.launch(debug=False, share=False)