import torch import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel device = "cuda" if torch.cuda.is_available() else "cpu" tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b-int4", trust_remote_code=True) model = AutoModel.from_pretrained("THUDM/chatglm2-6b-int4", trust_remote_code=True).float() def chat(message,history): for response,history in model.stream_chat(tokenizer,message,history,max_length=2048,top_p=0.7,temperature=1): yield response gr.ChatInterface(chat, title="ProChat(A Chatbot that's FREE!)", description=""" Hi guys! I am a solo developer and I made an app: __ProChat__. """, ).queue(1).launch()