import time import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline torch.random.manual_seed(0) model = AutoModelForCausalLM.from_pretrained( "microsoft/Phi-3-mini-128k-instruct", torch_dtype="auto", trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct") pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, ) generation_args = { "max_new_tokens": 500, "return_full_text": False, "temperature": 0.0, "do_sample": False, } def chat(message, history): messages = [ {"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello.. How may I help you?"}, {"role": "user", "content": message}, ] output = pipe(messages, **generation_args) print(output[0]['generated_text']) return output[0]['generated_text'] description = """

Phi-3-mini-128k-instruct Chatbot

This chatbot is based on the Phi-3-mini-128k-instruct model by Microsoft.

Feel free to ask any questions or start a conversation!

""" demo = gr.ChatInterface(chat, description=description).queue() if __name__ == "__main__": demo.launch()