import time import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline torch.random.manual_seed(0) model = AutoModelForCausalLM.from_pretrained( "microsoft/Phi-3-mini-128k-instruct", torch_dtype="auto", trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct") pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, ) generation_args = { "max_new_tokens": 500, "return_full_text": False, "temperature": 0.0, "do_sample": False, } def chat(message): messages = [ {"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello.. How may I help you?"}, {"role": "user", "content": message}, ] output = pipe(messages, **generation_args) return output[0]['generated_text'] description = """

Phi-3-mini-128k-instruct

This Q/A chatbot is based on the Phi-3-mini-128k-instruct model by Microsoft.

Feel free to ask any questions or start a conversation!

""" #demo = gr.ChatInterface(chat, description=description).queue() demo= gr.Interface(fn=chat, inputs="textbox", outputs="textbox",description=description) demo.launch()