from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration import torch import gradio as gr device = "cuda" if torch.cuda.is_available() else "cpu" model_name = "./blenderbot-1B-distill" tokenizer = BlenderbotTokenizer.from_pretrained(model_name) model = BlenderbotForConditionalGeneration.from_pretrained(model_name) model.to(device) def get_reply(response, history = []): response = input("You: ") history.append(response) if response.endswith(("bye", "Bye", "bye.", "Bye.")): return "
Chatbot restarted
", [] if len(history) > 4: history = history[-4:] inputs = tokenizer(" ".join(history), return_tensors="pt") inputs.to(device) outputs = model.generate(**inputs) reply = tokenizer.decode(outputs[0][1:-1]).strip() history.append(reply) html = "
" for m, msg in enumerate(history): cls = "user" if m%2 == 0 else "bot" html += "
{}
".format(cls, msg) html += "
" return html, history css = """ .chatbox {display:flex;flex-direction:column} .msg {padding:4px;margin-bottom:4px;border-radius:4px;width:80%} .msg.user {background-color:cornflowerblue;color:white} .msg.bot {background-color:lightgray;align-self:self-end} .footer {display:none !important} """ gr.Interface(fn=get_reply, theme="default", inputs=[gr.inputs.Textbox(placeholder="How are you?"), "state"], outputs=["html", "state"], enable_queue=True, css=css).launch(debug=True)