from transformers import pipeline, Conversation import gradio as gr import time chatbot = pipeline("text-generation", model="epfl-llm/meditron-7b", use_auth_token=True) # chatbot = pipeline("translation", model="facebook/mbart-large-50-many-to-many-mmt", use_auth_token=True) message_list = [] response_list = [] print("START") def vanilla_chatbot(message, history): start = time.perf_counter() print("start chat") conversation = Conversation(text=message, past_user_inputs=message_list, generated_responses=response_list) conversation = chatbot(conversation) to_return = conversation.generated_responses[-1] print ("Answer in %5.1f secs " % (time.perf_counter() - start)) return to_return def chat_bot(message, history): start = time.perf_counter() print("start chat") to_return = chatbot(message, max_length=500)[0]['generated_text'] print ("Answer in %5.1f secs " % (time.perf_counter() - start)) return to_return demo_chatbot = gr.ChatInterface(chat_bot, title="Check medical chatbot", description="Enter question") demo_chatbot.launch()