import json
import random
import uuid

import gradio as gr
import spaces
import torch
from diffusers import DiffusionPipeline
from transformers import AutoModelForCausalLM, AutoTokenizer

device = torch.device("cuda:0")

llm = AutoModelForCausalLM.from_pretrained("Azure99/blossom-v5.1-9b", torch_dtype=torch.float16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("Azure99/blossom-v5.1-9b")
diffusion_pipe = DiffusionPipeline.from_pretrained(
    "playgroundai/playground-v2.5-1024px-aesthetic",
    torch_dtype=torch.float16,
    use_safetensors=True,
    add_watermarker=False,
    variant="fp16"
).to(device)


def get_input_ids(inst, bot_prefix):
    return tokenizer.encode("A chat between a human and an artificial intelligence bot. "
                            "The bot gives helpful, detailed, and polite answers to the human's questions.\n"
                            f"|Human|: {inst}\n|Bot|: {bot_prefix}", add_special_tokens=True)


def save_image(img):
    unique_name = str(uuid.uuid4()) + ".png"
    img.save(unique_name)
    return unique_name


LLM_PROMPT = '''你的任务是从输入的[作画要求]中抽取画面描述(description)，然后description翻译为英文(en_description)，最后对en_description进行扩写(expanded_description)，增加足够多的细节，且符合人类的第一直觉。
[输出]是一个json，包含description、en_description、expanded_description三个字符串字段，请直接输出一个完整的json，不要输出任何解释或其他无关内容。
    
下面是一些示例：
[作画要求]->"画一幅画：落霞与孤鹜齐飞，秋水共长天一色。"
[输出]->{"description": "落霞与孤鹜齐飞，秋水共长天一色", "en_description": "The setting sun and the solitary duck fly together, the autumn water shares a single hue with the vast sky", "expanded_description": "A lone duck gracefully gliding across the tranquil surface of a shimmering lake, bathed in the warm golden glow of the setting sun, creating a breathtaking scene of natural beauty and tranquility."}
    
[作画要求]->"原神中的可莉"
[输出]->{"description": "原神中的可莉", "en_description": "Klee in Genshin Impact", "expanded_description": "An artistic portrait of Klee from Genshin Impact, standing in a vibrant meadow with colorful explosions of her elemental abilities in the background."}
    
[作画要求]->"create an image for me. a close up of a woman wearing a transparent, prismatic, elaborate nemeses headdress, over the should pose, brown skin-tone"
[输出]->{"description": "a close up of a woman wearing a transparent, prismatic, elaborate nemeses headdress, over the should pose, brown skin-tone", "en_description": "a close up of a woman wearing a transparent, prismatic, elaborate nemeses headdress, over the should pose, brown skin-tone", "expanded_description": "A close-up portrait of an elegant woman with rich brown skin, wearing a stunning transparent, prismatic, and intricately detailed Nemes headdress, striking a confident and alluring over-the-shoulder pose."}
    
[作画要求]->"一只高贵的柯基犬，素描画风格\n根据上面的描述生成一张图片吧！"
[输出]->{"description": "一只高贵的柯基犬，素描画风格", "en_description": "A noble corgi dog, sketch style", "expanded_description": "A majestic corgi with a regal bearing, depicted in a detailed and intricate pencil sketch, capturing the essence of its noble lineage and dignified presence."}
    
[作画要求]->$USER_PROMPT
[输出]->'''

BOT_PREFIX = '{"description": "'


@spaces.GPU(enable_queue=True)
def generate(
        prompt: str,
        progress=gr.Progress(track_tqdm=True),
):
    input_ids = get_input_ids(LLM_PROMPT.replace("$USER_PROMPT", json.dumps(prompt, ensure_ascii=False)), BOT_PREFIX)
    generation_kwargs = dict(input_ids=torch.tensor([input_ids]).to(llm.device), do_sample=True,
                             max_new_tokens=512, temperature=0.5, top_p=0.85, top_k=50, repetition_penalty=1.05)
    llm_result = llm.generate(**generation_kwargs)
    llm_result = llm_result.cpu()[0][len(input_ids):]
    llm_result = BOT_PREFIX + tokenizer.decode(llm_result, skip_special_tokens=True)
    print("----------")
    print(prompt)
    print(llm_result)
    en_prompt = prompt
    expanded_prompt = prompt
    try:
        en_prompt = json.loads(llm_result)["en_description"]
        expanded_prompt = json.loads(llm_result)["expanded_description"]
    except:
        print("error, fallback to original prompt")
        pass

    seed = random.randint(0, 2147483647)
    generator = torch.Generator().manual_seed(seed)

    images = diffusion_pipe(
        prompt=[expanded_prompt, en_prompt],
        negative_prompt=None,
        width=1024,
        height=1024,
        guidance_scale=3,
        num_inference_steps=25,
        generator=generator,
        num_images_per_prompt=1,
        use_resolution_binning=True,
        output_type="pil",
    ).images

    image_paths = [save_image(img) for img in images]
    return image_paths


css = '''
    .gradio-container{max-width: 560px !important}
    h1{text-align:center}
    '''
with gr.Blocks(css=css) as demo:
    gr.Markdown("# Blossom & Playground v2.5")
    with gr.Group():
        with gr.Row():
            prompt = gr.Text(
                label="Prompt",
                show_label=False,
                max_lines=1,
                placeholder="Enter your prompt",
                container=False,
            )
            run_button = gr.Button("Run", scale=0)
        result = gr.Gallery(label="Result", columns=2, rows=1, show_label=False)

    gr.on(
        triggers=[
            prompt.submit,
            run_button.click,
        ],
        fn=generate,
        inputs=[
            prompt,
        ],
        outputs=[result],
        api_name="run",
    )

if __name__ == "__main__":
    demo.queue(max_size=20).launch()