from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr

prologue = """quality: high

[System]
Assistant is a distilled language model trained by the community.<|STK_SP|>

[System]
<|STK_SP|>

[User]"""

tokenizer = AutoTokenizer.from_pretrained("mrsteyk/openchatgpt-neox-125m", use_fast=True)
model = AutoModelForCausalLM.from_pretrained("mrsteyk/openchatgpt-neox-125m")

def chat(inpt, max_new_tokens, top_k, top_p, temperature, repetition_penalty):
    inputs = tokenizer(f"{prologue}\n{inpt}<|STK_SP|>\n\n[Assistant]\n", return_tensors="pt").input_ids
    outputs = model.generate(inputs, max_new_tokens=max_new_tokens, do_sample=True, top_k=top_k, top_p=top_p, eos_token_id=tokenizer.sep_token_id, temperature=temperature, repetition_penalty=repetition_penalty)
    return tokenizer.batch_decode([i[len(inputs[0]):] for i in outputs], skip_special_tokens=True)[0]

gr.Interface(
    fn=chat,
    inputs=["textbox", gr.Slider(767, 2048 + 1), gr.Slider(0, 100, value=50), gr.Slider(0, 1, step=0.01, value=0.95), gr.Slider(0.01, 1, step=0.01, value=1), gr.Slider(1, 100, step=0.5)],
    outputs=[gr.Textbox(label="Assistant says")],
    examples=[
        ["Hello, I have a question about American history. Who is the current Vice President of the United States?", 767, 50, 0.95, 1, 1],
        ["Hello, I have a question about quantum computing. Can quantum computers solve NP-complete problems in polynomial time?", 767, 50, 0.95, 1, 1],
        ["I'm wondering how to make an apple pie?", 767, 50, 0.95, 1, 1],
        ["Hi, I want to know about the GPT-3 model. Could you provide me some information about it?", 767, 50, 0.95, 1, 1],
        ["Please, help me understand LLMs!", 767, 50, 0.95, 1, 1],
        ["What is the meaning of life?", 767, 50, 0.95, 1, 1],
        ["What is the origin of the word 'sushi'?", 767, 50, 0.95, 1, 1],
        ["What's the difference between a chatbot and an AI?", 767, 50, 0.95, 1, 1],
        ["What's the difference between a monad and a functor in functional programming?", 767, 50, 0.95, 1, 1],
    ],
    cache_examples=False,
).launch()