from transformers import AutoTokenizer, AutoModel model_path = "THUDM/chatglm2-6b" tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) model = AutoModel.from_pretrained(model_path,trust_remote_code=True).half().quantize(4) model = model.eval() resp, history = model.chat(tokenizer, "美国的首都是哪里", history=[]) print(resp)