kolcontrl

Running on Zero

File size: 7,974 Bytes

d5f497d
 
 
6c91ee7
 
 
d5f497d
6c91ee7
 
 
d5f497d
 
6c91ee7
 
 
 
 
 
 
 
d5f497d
 
 
6c91ee7
 
d5f497d
 
 
 
 
6c91ee7
 
 
d5f497d
6c91ee7
d5f497d
6c91ee7
 
 
 
 
d5f497d
6c91ee7
d5f497d
6c91ee7
d5f497d
6c91ee7
d5f497d
 
6c91ee7
d5f497d
 
6c91ee7
d5f497d
6c91ee7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5f497d
 
8004741
d5f497d
 
d4fa96f
6c91ee7
 
d4fa96f
 
6c91ee7
 
 
 
 
 
 
d5f497d
 
 
6c91ee7
 
 
 
 
 
 
d5f497d
6c91ee7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5f497d
cd4f227
d5f497d
 
 
 
 
d890da3
d5f497d
 
 
 
 
 
 
f92dc60
 
 
 
 
 
 
d5f497d
 
 
 
 
 
d890da3
d5f497d
 
6c91ee7
 
 
 
 
 
 
d5f497d
 
 
 
 
6c91ee7
d5f497d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c91ee7
d5f497d
 
 
 
 
 
6c91ee7
d5f497d
 
6c91ee7
 
d5f497d
 
6c91ee7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5f497d
d890da3
cbcd3a1
d5f497d
 
6c91ee7
d5f497d
 
 
d4fa96f
d5f497d
6c91ee7
d4fa96f
d5f497d
 
 
 
6c91ee7
d5f497d
 
 
8004741

import spaces
import random
import torch
import cv2
import gradio as gr
import numpy as np
from huggingface_hub import snapshot_download
from transformers import CLIPVisionModelWithProjection,CLIPImageProcessor
from diffusers.utils import load_image
from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import StableDiffusionXLControlNetImg2ImgPipeline
from kolors.models.modeling_chatglm import ChatGLMModel
from kolors.models.tokenization_chatglm import ChatGLMTokenizer
from kolors.models.controlnet import ControlNetModel
from diffusers import  AutoencoderKL
from kolors.models.unet_2d_condition import UNet2DConditionModel
from diffusers import EulerDiscreteScheduler
from PIL import Image
from annotator.midas import MidasDetector
from annotator.util import resize_image, HWC3


device = "cuda"
ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
ckpt_dir_depth = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Depth")
ckpt_dir_canny = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Canny")

text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
controlnet_depth = ControlNetModel.from_pretrained(f"{ckpt_dir_depth}", revision=None).half().to(device)
controlnet_canny = ControlNetModel.from_pretrained(f"{ckpt_dir_canny}", revision=None).half().to(device)

pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
    vae=vae,
    controlnet = controlnet_depth,
    text_encoder=text_encoder,
    tokenizer=tokenizer,
    unet=unet,
    scheduler=scheduler,
    force_zeros_for_empty_prompt=False
)

pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
    vae=vae,
    controlnet = controlnet_canny,
    text_encoder=text_encoder,
    tokenizer=tokenizer,
    unet=unet,
    scheduler=scheduler,
    force_zeros_for_empty_prompt=False
)

@spaces.GPU
def process_canny_condition(image, canny_threods=[100,200]):
    np_image = image.copy()
    np_image = cv2.Canny(np_image, canny_threods[0], canny_threods[1])
    np_image = np_image[:, :, None]
    np_image = np.concatenate([np_image, np_image, np_image], axis=2)
    np_image = HWC3(np_image)
    return Image.fromarray(np_image)

model_midas = MidasDetector()

@spaces.GPU
def process_depth_condition_midas(img, res = 1024):
    h,w,_ = img.shape
    img = resize_image(HWC3(img), res)
    result = HWC3(model_midas(img))
    result = cv2.resize(result, (w,h))
    return Image.fromarray(result)

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024

@spaces.GPU
def infer(prompt, 
          image = None, 
          controlnet_type = "Depth", 
          negative_prompt = "", 
          seed = 0, 
          randomize_seed = False,
          guidance_scale = 6.0, 
          num_inference_steps = 50,
          controlnet_conditioning_scale = 0.7,
          control_guidance_end = 0.9,
          strength = 1.0
        ):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator().manual_seed(seed)
    init_image = resize_image(image,  MAX_IMAGE_SIZE)
    if controlnet_type == "Depth":
        pipe = pipe_depth.to("cuda")
        condi_img = process_depth_condition_midas( np.array(init_image), MAX_IMAGE_SIZE)
    elif controlnet_type == "Canny":
        pipe = pipe_canny.to("cuda")
        condi_img = process_canny_condition(np.array(init_image))
    else:
        return None
    image = pipe(
        prompt= prompt ,
        image = init_image,
        controlnet_conditioning_scale = controlnet_conditioning_scale,
        control_guidance_end = control_guidance_end, 
        strength= strength , 
        control_image = condi_img,
        negative_prompt= negative_prompt , 
        num_inference_steps= num_inference_steps, 
        guidance_scale= guidance_scale,
        num_images_per_prompt=1,
        generator=generator,
    ).images[0]
    return [condi_img, image]
examples = [

]

css="""
#col-left {
    margin: 0 auto;
    max-width: 600px;
}
#col-right {
    margin: 0 auto;
    max-width: 750px;
}
"""

def load_description(fp):
    with open(fp, 'r', encoding='utf-8') as f:
        content = f.read()
    return content

with gr.Blocks(css=css) as Kolors:
    gr.HTML(load_description("assets/title.md"))
    with gr.Row():
        with gr.Column(elem_id="col-left"):
            with gr.Row():
                prompt = gr.Textbox(
                    label="Prompt",
                    placeholder="Enter your prompt",
                    lines=2
                )
            with gr.Row():
                controlnet_type = gr.Dropdown(
                    ["Depth", "Canny"],
                    label = "Controlnet",
                    value="Depth"
                )
            with gr.Row():
                image = gr.Image(label="Image", type="pil")
            with gr.Accordion("Advanced Settings", open=False):
                negative_prompt = gr.Textbox(
                    label="Negative prompt",
                    placeholder="Enter a negative prompt",
                    visible=True,
                    value="nsfw，脸部阴影，低分辨率，jpeg伪影、模糊、糟糕，黑脸，霓虹灯"
                )
                seed = gr.Slider(
                    label="Seed",
                    minimum=0,
                    maximum=MAX_SEED,
                    step=1,
                    value=0,
                )
                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                with gr.Row():
                    guidance_scale = gr.Slider(
                        label="Guidance scale",
                        minimum=0.0,
                        maximum=10.0,
                        step=0.1,
                        value=6.0,
                    )
                    num_inference_steps = gr.Slider(
                        label="Number of inference steps",
                        minimum=10,
                        maximum=50,
                        step=1,
                        value=30,
                    )
                with gr.Row():
                    controlnet_conditioning_scale = gr.Slider(
                        label="Controlnet Conditioning Scale",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.1,
                        value=0.7,
                    )
                    control_guidance_end = gr.Slider(
                        label="Control Guidance End",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.1,
                        value=0.9,
                    )
                with gr.Row():
                    strength = gr.Slider(
                        label="Strength",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.1,
                        value=1.0,
                    )
            with gr.Row():
                run_button = gr.Button("Run")
            
        with gr.Column(elem_id="col-right"):
            result = gr.Gallery(label="Result", show_label=False, columns=2)
    
    with gr.Row():
        gr.Examples(
                fn = infer,
                examples = examples,
                inputs = [prompt, image, controlnet_type],
                outputs = [result]
            )

    run_button.click(
        fn = infer,
        inputs = [prompt, image, controlnet_type, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
        outputs = [result]
    )

Kolors.queue().launch(debug=True)