import gradio as gr
from gpt4all import GPT4All
from huggingface_hub import hf_hub_download

repo_id = "TheBloke/openchat_3.5-GGUF"
model_name = "openchat_3.5.Q4_K_M.gguf"

hf_hub_download(repo_id=repo_id, filename=model_name, local_dir="models", local_dir_use_symlinks=False)

model = model = GPT4All(model_name, "models", allow_download = False, device="cpu")

model.config["promptTemplate"] = "[INST] {0} [/INST]"
model.config["systemPrompt"] = ""
model._is_chat_session_activated = False

max_new_tokens = 2048

def predict(input, instruction, history, temperature, top_p, top_k, max_tokens):

    history = history or []
    formatted_input = "<s>"
    
    for user_message, assistant_message in history:
        formatted_input += model.config["promptTemplate"].format(user_message)
        formatted_input += assistant_message + "</s>"
        
    formatted_input += model.config["promptTemplate"].format(input)
    
    result = model.generate(
        formatted_input,
        temp = temperature,
        max_new_tokens = max_tokens,
        top_p = top_p,
        top_k = top_k,
        do_sample = True,
        stream = False,
        details = False,
        return_full_text = False
    )

    history = history + [[input, result]]

    return (result, input, history)

def clear_history():
    print(">>> HISTORY CLEARED!")
    return []

with gr.Blocks() as demo:
    with gr.Row(variant = "panel"):
        gr.Markdown("A AI model test demo.")
            
    with gr.Row():
        with gr.Column():
            history = gr.Chatbot(label = "History", elem_id = "chatbot")
            input = gr.Textbox(label = "Input", value = "Hi.", lines = 2)
            instruction = gr.Textbox(label = "Instruction", value = "You are an AI chatbot.", lines = 4)
            run = gr.Button("▶")
            clear = gr.Button("🗑️")
            
        with gr.Column():
            temperature = gr.Slider( minimum = 0, maximum = 2, value = 1, step = 0.01, interactive = True, label = "Temperature" )
            top_p = gr.Slider( minimum = 0.01, maximum = 0.99, value = 0.95, step = 0.01, interactive = True, label = "Top P" )
            top_k = gr.Slider( minimum = 1, maximum = 2048, value = 50, step = 1, interactive = True, label = "Top K" )
            max_tokens = gr.Slider( minimum = 1, maximum = 2048, value = 32, step = 64, interactive = True, label = "Max New Tokens" )
            
    with gr.Row():
        with gr.Column():
            output = gr.Textbox(label = "Output", value = "", lines = 50)

    run.click(predict, inputs = [input, instruction, history, temperature, top_p, top_k, max_tokens], outputs = [output, input, history])
    clear.click(clear_history, [], history)
    
demo.queue(concurrency_count = 500, api_open = True).launch(show_api = True)