import gradio as gr
from transformers import pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer

model_path = "finetuned_phi2"
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

gen = pipeline('text-generation', model=model, tokenizer=tokenizer)
result = gen(prompt)
print(result[0]['generated_text'])


def generate(prompt, history, temperature=0.3, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0,):
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    generate_kwargs = dict(temperature=temperature,
                            max_new_tokens=max_new_tokens,
                            top_p=top_p,
                            repetition_penalty=repetition_penalty,
                            do_sample=True,
                            seed=42)
    
    #output = gen(prompt, **generate_kwargs)
    output = gen(prompt)
    return output[0]['generated_text']

bbchatbot = gr.Chatbot(
    avatar_images=[".logo/user logo 2.png", ".logo/bot logo.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)

demo = gr.ChatInterface(fn=generate, 
                        chatbot=bbchatbot,
                        title="🧑🏽‍💻Microsoft Phi2 Chatbot🤖"
                       )

demo.queue().launch(show_api=False)