Spaces:

gstranger
/

diffusion_models

Sleeping

File size: 10,529 Bytes

import os
import random

import cv2
import gradio as gr
import numpy as np
import torch
# import spaces #[uncomment to use ZeroGPU]
from diffusers import (ControlNetModel, StableDiffusionControlNetPipeline,
                       StableDiffusionPipeline)
from peft import PeftModel
from PIL import Image

device = "cuda" if torch.cuda.is_available() else "cpu"
# model_repo_id = "stabilityai/sdxl-turbo"  # Replace to the model you would like to use
# model_repo_id = "CompVis/stable-diffusion-v1-4"
# model_dropdown = ["stabilityai/sdxl-turbo", "CompVis/stable-diffusion-v1-4"]

models = [
    # "gstranger/kawaiicat-lora-1.4",
    "CompVis/stable-diffusion-v1-4",
    "stabilityai/sdxl-turbo",
    "sd-legacy/stable-diffusion-v1-5",
]
controlnet_modes = ["canny", "Line Art"]

model_dropdown = [
    "stabilityai/sdxl-turbo",
    "CompVis/stable-diffusion-v1-4",
    "sd-legacy/stable-diffusion-v1-5",
]

def process_control_image(image, mode="canny"):
  if mode == "canny":
    image = np.array(image)
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    canny = cv2.Canny(blurred, 50, 150)
    return Image.fromarray(canny)
  return image

if torch.cuda.is_available():
    torch_dtype = torch.float16
else:
    torch_dtype = torch.float32

# pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
# pipe = pipe.to(device)

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024
MODEL_NAME = "CompVis/stable-diffusion-v1-4"
CKPT_DIR = "sd-14-lora-1000"



# @spaces.GPU #[uncomment to use ZeroGPU]
def infer(
    model_id,
    prompt,
    negative_prompt,
    randomize_seed=False,
    width=512,
    height=512,
    seed = 488,
    guidance_scale=7,
    num_inference_steps=50,

    lora_enable=True,
    lora_scale=0.8,
    
    controlnet_enable=False,
    control_mode="Line Art",
    control_strength=0.8,
    control_image=None,

    ip_adapter_enable=False,
    ip_adapter_scale=0.8,
    ip_image=None,


    torch_dtype=torch_dtype,
    progress=gr.Progress(track_tqdm=True),
):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    else:
        seed = 488

    generator = torch.Generator().manual_seed(seed)

    params = {'prompt': prompt,
            'negative_prompt': negative_prompt,
            'guidance_scale': guidance_scale,
            'num_inference_steps': num_inference_steps,
            'width': width,
            'height': height,
            'generator': generator,
            }
    
    print("in infer 1")
    print("controlnet_enable", controlnet_enable)

    controlnet = None
    if controlnet_enable and control_image is not None:
        print("in controlnet_enable")
        if control_mode == "canny":
            controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", 
                                                         torch_dtype=torch_dtype, cache_dir="./models_cache")
            
        else: 
            control_mode == "Line Art"
            controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_lineart", 
                                                         torch_dtype=torch_dtype, cache_dir="./models_cache")
            
        pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id, 
                                                                 controlnet=controlnet,
                                                                 torch_dtype=torch_dtype, 
                                                                 safety_checker=None) #.to(device)
        
        params['image'] = process_control_image(control_image, control_mode)
        params['controlnet_conditioning_scale'] = float(control_strength)


    else:
        print("step: basic pipeline")
        pipe = StableDiffusionPipeline.from_pretrained(model_id, 
                                                       torch_dtype=torch_dtype, 
                                                       safety_checker=None) #.to(device)
        print("step: basic pipeline done!")

    if lora_enable:
        print("step: lora")
        unet_sub_dir = os.path.join(CKPT_DIR, "unet")
        text_encoder_sub_dir = os.path.join(CKPT_DIR, "text_encoder")
        adapter_name="sd-14-lora"

        pipe.unet = PeftModel.from_pretrained(pipe.unet, unet_sub_dir, adapter_name=adapter_name)
        pipe.text_encoder = PeftModel.from_pretrained(
            pipe.text_encoder, text_encoder_sub_dir, adapter_name=adapter_name
        )
        params['cross_attention_kwargs']={"scale": lora_scale}
        print("step: lora done!")

    if torch_dtype in (torch.float16, torch.bfloat16):
        pipe.unet.half()
        pipe.text_encoder.half()

    if ip_adapter_enable:
        print("step: ip_adapter_enable")
        pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-plus_sd15.bin")
        pipe.set_ip_adapter_scale(ip_adapter_scale)
        params['ip_adapter_image'] = process_control_image(ip_image, "")
        print("step: ip_adapter_enable done!")


    # pipe.to(device)

    print("step: start generating")
    print(params)
    image = pipe(**params
    ).images[0]
    print("step: generating done!")
    return image, seed


examples = [
    "kawaiicat. The cat is sitting. The cat's tail is curled up at the end. The cat is pleased and is enjoying its time.",
    "kawaiicat. The cat is sitting upright. The cat is eating some noodles with the chopsticks from a green bowl, which it's holding in his hands.",
]

css = """
#col-container {
    margin: 0 auto;
    max-width: 640px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(" # Text-to-Image kawaiicat Stickers")
        with gr.Row():
            # Dropdown to select the model from Hugging Face
            model_id = gr.Dropdown(
                label="Model",
                choices=models,
                value=models[0],  # Default model
            )

        lora_scale = gr.Slider(
            label="LORA Scale",
            minimum=0,
            maximum=1,
            step=0.01,
            value=0.8,
        )

        lora_enable = gr.Checkbox(label="Use LORA", value=True)

        with gr.Column():
            controlnet_enable = gr.Checkbox(label="Enable ControlNet", value=False)
            with gr.Accordion("ControlNet Settings", visible=False) as controlnet_accordion:
                control_mode = gr.Dropdown(controlnet_modes, label="Control Mode", value="canny")
                control_strength = gr.Slider(0.0, 2.0, value=1.0, step=0.1, label="Control Strength")
                control_image = gr.Image(label="Control Image", type="pil")

            ip_adapter_enable = gr.Checkbox(label="Enable IP-Adapter", value=False)
            with gr.Accordion("IP-Adapter Settings", visible=False) as ipadapter_accordion:
                ip_adapter_scale = gr.Slider(0, 1, value=0.5, label="IP-Adapter Scale")
                ip_image = gr.Image(label="Reference Image", type="pil")

        with gr.Row():
            prompt = gr.Text(
                label="Prompt",
                show_label=False,
                max_lines=1,
                placeholder="Enter your prompt",
                container=False,
                value="kawaiicat. The cat is having fun, is smiling."
            )        
            
            negative_prompt = gr.Textbox(
                label="Negative prompt",
                max_lines=1,
                placeholder="Enter your negative prompt",
                value="bad anatomy, crop image, bad face of the cat"
            )


            run_button = gr.Button("Run", scale=0, variant="primary")

        result = gr.Image(label="Result", show_label=False)

        with gr.Accordion("Advanced Settings", open=False):
            negative_prompt = gr.Text(
                label="Negative prompt",
                max_lines=1,
                placeholder="Enter a negative prompt",
                visible=True,
                value="bad anatomy, crop image, bad face of the cat"
            )

            seed = gr.Slider(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=42,
            )

            randomize_seed = gr.Checkbox(label="Randomize seed", value=False)

            with gr.Row():
                width = gr.Slider(
                    label="Width",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=512,  # Replace with defaults that work for your model
                )

                height = gr.Slider(
                    label="Height",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=512,  # Replace with defaults that work for your model
                )

            with gr.Row():
                guidance_scale = gr.Slider(
                    label="Guidance scale",
                    minimum=0.0,
                    maximum=10.0,
                    step=0.1,
                    value=10.0,  # Replace with defaults that work for your model
                )

                num_inference_steps = gr.Slider(
                    label="Number of inference steps",
                    minimum=1,
                    maximum=50,
                    step=1,
                    value=50,  # Replace with defaults that work for your model
                )

        gr.Examples(examples=examples, inputs=[prompt])

        controlnet_enable.change(
            lambda x: gr.update(visible=x), 
            controlnet_enable, 
            controlnet_accordion
        )
        ip_adapter_enable.change(
            lambda x: gr.update(visible=x), 
            ip_adapter_enable, 
            ipadapter_accordion
        )

    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[
            model_id,
            prompt,
            negative_prompt,
            randomize_seed,
            width,
            height,
            seed,
            guidance_scale,
            num_inference_steps,
            lora_enable, lora_scale,
            controlnet_enable, control_mode, control_strength, control_image,
            ip_adapter_enable, ip_adapter_scale, ip_image

        ],
        outputs=[result, seed],
    )


if __name__ == "__main__":
    demo.launch()