|
import time |
|
import gradio as gr |
|
|
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
|
|
pipe = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
) |
|
|
|
generation_args = { |
|
"max_new_tokens": 500, |
|
"return_full_text": False, |
|
"temperature": 0.0, |
|
"do_sample": False, |
|
} |
|
|
|
def chat(message, history): |
|
messages = [ |
|
{"role": "user", "content": "Hi"}, |
|
{"role": "assistant", "content": "Hello.. How may I help you?"}, |
|
{"role": "user", "content": message}, |
|
] |
|
output = pipe(messages, **generation_args) |
|
print(output[0]['generated_text']) |
|
return output[0]['generated_text'] |
|
|
|
demo = gr.ChatInterface(chat).queue() |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |