import gradio as gr
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

def load_model(model_path, dtype):
    if dtype == "fp32":
        torch_dtype = torch.float32
    elif dtype == "fp16":
        torch_dtype = torch.float16
    else:
        raise ValueError("Invalid dtype. Only 'fp32' or 'fp16' are supported.")

    model = T5ForConditionalGeneration.from_pretrained(model_path, torch_dtype=torch_dtype)
    return model

def generate(
    prompt,
    history,
    max_new_tokens,
    repetition_penalty,
    temperature,
    top_p,
    top_k,
    seed,
    model_path="roborovski/superprompt-v1",
    dtype="fp16",
):
    tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
    model = load_model(model_path, dtype)

    if torch.cuda.is_available():
        device = "cuda"
        print("Using GPU")
    else:
        device = "cpu"
        print("Using CPU")

    model.to(device)

    input_text = f"{prompt}, {history}"
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)

    torch.manual_seed(seed)
    outputs = model.generate(
        input_ids,
        max_new_tokens=max_new_tokens,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
    )

    better_prompt = tokenizer.decode(outputs[0])
    return better_prompt

additional_inputs = [
    gr.Slider(
        value=512,
        minimum=250,
        maximum=512,
        step=1,
        interactive=True,
        label="Max New Tokens",
        info="The maximum numbers of new tokens, controls how long is the output",
    ),
    gr.Slider(
        value=1.2,
        minimum=0,
        maximum=2,
        step=0.05,
        interactive=True,
        label="Repetition Penalty",
        info="Penalize repeated tokens, making the AI repeat less itself",
    ),
    gr.Slider(
        value=0.5,
        minimum=0,
        maximum=1,
        step=0.05,
        interactive=True,
        label="Temperature",
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        value=1,
        minimum=0,
        maximum=2,
        step=0.05,
        interactive=True,
        label="Top P",
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        value=1,
        minimum=1,
        maximum=100,
        step=1,
        interactive=True,
        label="Top K",
        info="Higher k means more diverse outputs by considering a range of tokens",
    ),
    gr.Number(
        value=42,
        interactive=True,
        label="Seed",
        info="A starting point to initiate the generation process",
    ),
    gr.Radio(
        choices=["fp32", "fp16"],
        value="fp16",
        label="Model Precision",
        info="Select the precision of the model: fp32 or fp16",
    ),
]

examples = [
    [
        "Expand the following prompt to add more detail: A storefront with 'Text to Image' written on it.",
        None,
        None,
        None,
        None,
        None,
        None,
        None,
        "roborovski/superprompt-v1",
        "fp16",
    ]
]

gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(
        show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"
    ),
    additional_inputs=additional_inputs,
    title="SuperPrompt-v1",
    description="Make your prompts more detailed!",
    examples=examples,
    concurrency_limit=20,
).launch(show_api=False)