import gradio as gr from transformers import pipeline from transformers import AutoModelForCausalLM, AutoTokenizer model_path = "finetuned_phi2" model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) gen = pipeline('text-generation', model=model, tokenizer=tokenizer) result = gen(prompt) print(result[0]['generated_text']) def generate(prompt, history, temperature=0.3, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0,): temperature = float(temperature) if temperature < 1e-2: temperature = 1e-2 top_p = float(top_p) generate_kwargs = dict(temperature=temperature, max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42) #output = gen(prompt, **generate_kwargs) output = gen(prompt) return output[0]['generated_text'] bbchatbot = gr.Chatbot( avatar_images=[".logo/user logo 2.png", ".logo/bot logo.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,) demo = gr.ChatInterface(fn=generate, chatbot=bbchatbot, title="🧑🏽💻Microsoft Phi2 Chatbot🤖" ) demo.queue().launch(show_api=False)