import sys from transformers import AutoModel, AutoTokenizer import gradio as gr sys.path.append("src") from src import load_pretrained, ModelArguments # if __name__ == "__main__": model_args = ModelArguments(checkpoint_dir="combined") model, tokenizer = load_pretrained(model_args) model = model.half().cuda() model = model.eval() import time def predict(input, history=None): if history is None: history = [] response, _ = model.chat(tokenizer, input, history) history[-1][1] = "" for character in response: history[-1][1] += character time.sleep(0.05) yield history with gr.Blocks() as demo: gr.Markdown('''## DigitalWDF - unofficial demo ''') chatbot = gr.Chatbot([], elem_id="chatbot").style(height=200) def user(user_message, history): return history + [[user_message, None]] with gr.Row(): with gr.Column(scale=4): txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False) with gr.Column(scale=1): button = gr.Button("Generate") txt.submit(user, [txt, chatbot], chatbot, queue=False).then(predict, [txt, chatbot], chatbot) button.click(user, [txt, chatbot], chatbot, queue=False).then(predict, [txt, chatbot], chatbot) demo.queue().launch()