import os
import random

import gradio as gr
import numpy as np
import torch
from PIL import Image, ImageFilter
from transformers import CLIPTextModel

from diffusers import UniPCMultistepScheduler
from model.BrushNet_CA import BrushNetModel
from model.diffusers_c.models import UNet2DConditionModel
from pipeline.pipeline_PowerPaint_Brushnet_CA import StableDiffusionPowerPaintBrushNetPipeline
from utils.utils import TokenizerWrapper, add_tokens


base_path = "./PowerPaint_v2"
os.system("apt install git")
os.system("apt install git-lfs")
os.system(f"git lfs clone https://code.openxlab.org.cn/zhuangjunhao/PowerPaint_v2.git {base_path}")
os.system(f"cd {base_path} && git lfs pull")
os.system("cd ..")
torch.set_grad_enabled(False)
context_prompt = ""
context_negative_prompt = ""
base_model_path = "./PowerPaint_v2/realisticVisionV60B1_v51VAE/"
dtype = torch.float16
unet = UNet2DConditionModel.from_pretrained(
    "runwayml/stable-diffusion-v1-5", subfolder="unet", revision=None, torch_dtype=dtype
)
text_encoder_brushnet = CLIPTextModel.from_pretrained(
    "runwayml/stable-diffusion-v1-5", subfolder="text_encoder", revision=None, torch_dtype=dtype
)
brushnet = BrushNetModel.from_unet(unet)
global pipe
pipe = StableDiffusionPowerPaintBrushNetPipeline.from_pretrained(
    base_model_path,
    brushnet=brushnet,
    text_encoder_brushnet=text_encoder_brushnet,
    torch_dtype=dtype,
    low_cpu_mem_usage=False,
    safety_checker=None,
)
pipe.unet = UNet2DConditionModel.from_pretrained(base_model_path, subfolder="unet", revision=None, torch_dtype=dtype)
pipe.tokenizer = TokenizerWrapper(from_pretrained=base_model_path, subfolder="tokenizer", revision=None)
add_tokens(
    tokenizer=pipe.tokenizer,
    text_encoder=pipe.text_encoder_brushnet,
    placeholder_tokens=["P_ctxt", "P_shape", "P_obj"],
    initialize_tokens=["a", "a", "a"],
    num_vectors_per_token=10,
)
from safetensors.torch import load_model


load_model(pipe.brushnet, "./PowerPaint_v2/PowerPaint_Brushnet/diffusion_pytorch_model.safetensors")

pipe.text_encoder_brushnet.load_state_dict(
    torch.load("./PowerPaint_v2/PowerPaint_Brushnet/pytorch_model.bin"), strict=False
)

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

pipe.enable_model_cpu_offload()
global current_control
current_control = "canny"
# controlnet_conditioning_scale = 0.8


def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)


def add_task(control_type):
    # print(control_type)
    if control_type == "object-removal":
        promptA = "P_ctxt"
        promptB = "P_ctxt"
        negative_promptA = "P_obj"
        negative_promptB = "P_obj"
    elif control_type == "context-aware":
        promptA = "P_ctxt"
        promptB = "P_ctxt"
        negative_promptA = ""
        negative_promptB = ""
    elif control_type == "shape-guided":
        promptA = "P_shape"
        promptB = "P_ctxt"
        negative_promptA = "P_shape"
        negative_promptB = "P_ctxt"
    elif control_type == "image-outpainting":
        promptA = "P_ctxt"
        promptB = "P_ctxt"
        negative_promptA = "P_obj"
        negative_promptB = "P_obj"
    else:
        promptA = "P_obj"
        promptB = "P_obj"
        negative_promptA = "P_obj"
        negative_promptB = "P_obj"

    return promptA, promptB, negative_promptA, negative_promptB


def predict(
    input_image,
    prompt,
    fitting_degree,
    ddim_steps,
    scale,
    seed,
    negative_prompt,
    task,
    left_expansion_ratio,
    right_expansion_ratio,
    top_expansion_ratio,
    bottom_expansion_ratio,
):
    size1, size2 = input_image["image"].convert("RGB").size

    if task != "image-outpainting":
        input_image["image"] = input_image["image"].convert("RGB").resize((1024, 1024), Image.LANCZOS)
    else:
        input_image["image"] = input_image["image"].convert("RGB").resize((1024, 1024), Image.LANCZOS)
        

    if task == "image-outpainting" or task == "context-aware":
        prompt = prompt + " empty scene"
    if task == "object-removal":
        prompt = prompt + " empty scene blur"

    if (
        left_expansion_ratio is not None and right_expansion_ratio is not None
        and top_expansion_ratio is not None and bottom_expansion_ratio is not None
    ):
        o_W, o_H = input_image["image"].convert("RGB").size
        c_W = int((1 + left_expansion_ratio + right_expansion_ratio) * o_W)
        c_H = int((1 + top_expansion_ratio + bottom_expansion_ratio) * o_H)

        expand_img = np.ones((c_H, c_W, 3), dtype=np.uint8) * 127
        original_img = np.array(input_image["image"])
        expand_img[
            int(top_expansion_ratio * o_H):int(top_expansion_ratio * o_H) + o_H,
            int(left_expansion_ratio * o_W):int(left_expansion_ratio * o_W) + o_W,
            :
        ] = original_img

        blurry_gap = 10

        expand_mask = np.ones((c_H, c_W, 3), dtype=np.uint8) * 255
        expand_mask[
            int(top_expansion_ratio * o_H) + blurry_gap:int(top_expansion_ratio * o_H) + o_H - blurry_gap,
            int(left_expansion_ratio * o_W) + blurry_gap:int(left_expansion_ratio * o_W) + o_W - blurry_gap,
            :
        ] = 0

        input_image["image"] = Image.fromarray(expand_img)
        input_image["mask"] = Image.fromarray(expand_mask)

    promptA, promptB, negative_promptA, negative_promptB = add_task(task)
    img = np.array(input_image["image"].convert("RGB"))

    W = int(np.shape(img)[0] - np.shape(img)[0] % 8)
    H = int(np.shape(img)[1] - np.shape(img)[1] % 8)
    input_image["image"] = input_image["image"].resize((H, W), Image.LANCZOS)
    input_image["mask"] = input_image["mask"].resize((H, W), Image.LANCZOS)

    np_inpimg = np.array(input_image["image"])
    np_inmask = np.array(input_image["mask"]) / 255.0
    if len(np_inmask.shape)==2:
      np_inmask = np.expand_dims(np_inmask, axis=-1)
    # return np_inpimg, np_inmask

    np_inpimg = np_inpimg * (1 - np_inmask)

    input_image["image"] = Image.fromarray(np_inpimg.astype(np.uint8)).convert("RGB")


    # return input_image
    set_seed(seed)
    global pipe
    result = pipe(
        promptA=promptA,
        promptB=promptB,
        promptU=prompt,
        tradoff=fitting_degree,
        tradoff_nag=fitting_degree,
        image=input_image["image"].convert("RGB"),
        mask=input_image["mask"].convert("RGB"),
        num_inference_steps=ddim_steps,
        generator=torch.Generator("cuda").manual_seed(seed),
        brushnet_conditioning_scale=1.0,
        negative_promptA=negative_promptA,
        negative_promptB=negative_promptB,
        negative_promptU=negative_prompt,
        guidance_scale=scale,
        width=H,
        height=W,
    ).images[0]
    mask_np = np.array(input_image["mask"].convert("RGB"))
    red = np.array(result).astype("float") * 1
    red[:, :, 0] = 180.0
    red[:, :, 2] = 0
    red[:, :, 1] = 0
    result_m = np.array(result)
    result_m = Image.fromarray(
        (
            result_m.astype("float") * (1 - mask_np.astype("float") / 512.0) + mask_np.astype("float") / 512.0 * red
        ).astype("uint8")
    )
    m_img = input_image["mask"].convert("RGB").filter(ImageFilter.GaussianBlur(radius=3))
    m_img = np.asarray(m_img) / 255.0
    img_np = np.asarray(input_image["image"].convert("RGB")) / 255.0
    ours_np = np.asarray(result) / 255.0
    ours_np = ours_np * m_img + (1 - m_img) * img_np
    result_paste = Image.fromarray(np.uint8(ours_np * 255))

    dict_res = [input_image["mask"].convert("RGB"), result_m]

    dict_out = [result]

    return dict_out, dict_res


import gradio as gr

def custom_infer(input_image_path,
                 input_mask_path=None,
                 prompt="",
                 fitting_degree=0.5,
                 ddim_steps=20,
                 scale=5,
                 seed=143,
                 negative_prompt="",
                 task="text-guided",
                 left_expansion_ratio=0.2,
                 right_expansion_ratio=0.2,
                 top_expansion_ratio=0.2,
                 bottom_expansion_ratio=0.2):

    image = Image.open(input_image_path)
    if input_mask_path:
        mask = Image.open(input_mask_path)

    if task == "text-guided":
        input_dict = {"image": image, "mask": mask}
        a, b = predict(input_dict, prompt, fitting_degree, ddim_steps, scale, seed, negative_prompt, task, None, None, None, None)

    if task == "image-outpainting":
        input_dict = {"image": image}
        a, b = predict(input_dict, prompt, fitting_degree, ddim_steps, scale, seed, negative_prompt, task, left_expansion_ratio, right_expansion_ratio, top_expansion_ratio, bottom_expansion_ratio)

    return a[0]
import gradio as gr

# Define the Gradio interface using the new version
inputs = [
    gr.Image(label="Input Image", type="filepath"),
    gr.Image(label="Input Mask (optional)", type="filepath"),
    gr.Textbox(label="Prompt", value="A beautiful landscape"),
    gr.Slider(label="Fitting Degree", minimum=1, maximum=20, value=7, step=1),
    gr.Slider(label="DDIM Steps", minimum=10, maximum=50, value=20, step=1),
    gr.Slider(label="Scale", minimum=1, maximum=20, value=7.5, step=0.1),
    gr.Slider(label="Use Seed", minimum=0, maximum=1300000, value=143, step=1),
    gr.Textbox(label="Negative Prompt", value="blur, low quality"),
    gr.Radio(label="Task", choices=["text-guided", "image-outpainting"], value="image-outpainting"),
    gr.Slider(label="Left Expansion Ratio", minimum=0, maximum=2, value=0.2, step=0.01),
    gr.Slider(label="Right Expansion Ratio", minimum=0, maximum=2, value=0.2, step=0.01),
    gr.Slider(label="Top Expansion Ratio", minimum=0, maximum=2, value=0.2, step=0.01),
    gr.Slider(label="Bottom Expansion Ratio", minimum=0, maximum=2, value=0.2, step=0.01)
]

outputs = [
    gr.Image(label="Output Image")
]

# Create the Gradio interface
demo = gr.Interface(fn=custom_infer, inputs=inputs, outputs=outputs, title="Inference")

demo.queue(concurrency_count=1, max_size=1, api_open=True)
demo.launch(show_api=True)