"addmm_impl_cpu_" not implemented for 'Half'

#8
by you-2 - opened

推理报错

from transformers import AutoTokenizer, AutoModel
checkpoint = "./chatglm2-6b-int4/"
tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
model = AutoModel.from_pretrained(checkpoint, trust_remote_code=True, device='cpu')

model = model.eval()

model = model.cpu()
response, history = model.chat(tokenizer, "你好", history=[])
print(response)

Sign up or log in to comment