from transformers import AutoModel, AutoTokenizer
import gradio as gr
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda()
model = model.eval()

def beginchat(input, history=None):
    if history is None:
        history = []
    response, history = model.chat(tokenizer, input, history)
    return history, history


with gr.Blocks() as chatglm2bot:
    gr.Markdown('''### ChatGLM2-6B---chatbot demo
    
    Glm-130b: An open bilingual pre-trained model
    Author: Zeng, Aohan and Liu, Xiao and Du, Zhengxiao and Wang, Zihan and Lai, Hanyu and Ding, Ming and Yang, Zhuoyi and Xu, Yifan and Zheng, Wendi and Xia, Xiao and others
    Paper: arXiv preprint arXiv:2210.02414''')
    state = gr.State([])
    chatbot = gr.Chatbot([], elem_id="chatbot")
    with gr.Row():
        with gr.Column(scale=4):
            txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False)
        with gr.Column(scale=1):
            button = gr.Button("Generate")
    txt.submit(beginchat, [txt, state], [chatbot, state])
    button.click(beginchat, [txt, state], [chatbot, state])
chatglm2bot.queue().launch()