from transformers import AutoModel, AutoTokenizer import gradio as gr tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True) model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda() model = model.eval() def beginchat(input, history=None): if history is None: history = [] response, history = model.chat(tokenizer, input, history) return history, history with gr.Blocks() as chatglm2bot: gr.Markdown('''### ChatGLM2-6B---chatbot demo Glm-130b: An open bilingual pre-trained model Author: Zeng, Aohan and Liu, Xiao and Du, Zhengxiao and Wang, Zihan and Lai, Hanyu and Ding, Ming and Yang, Zhuoyi and Xu, Yifan and Zheng, Wendi and Xia, Xiao and others Paper: arXiv preprint arXiv:2210.02414''') state = gr.State([]) chatbot = gr.Chatbot([], elem_id="chatbot") with gr.Row(): with gr.Column(scale=4): txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False) with gr.Column(scale=1): button = gr.Button("Generate") txt.submit(beginchat, [txt, state], [chatbot, state]) button.click(beginchat, [txt, state], [chatbot, state]) chatglm2bot.queue().launch()