Spaces:
Runtime error
Runtime error
from transformers import AutoTokenizer, AutoModel | |
import gradio as gr | |
# tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="") | |
# model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda() | |
# chatglm-6b-int4 cuda,本地可以运行成功 | |
# tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="") | |
# model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda() | |
# chatglm-6b-int4 CPU, | |
tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="") | |
model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").float() | |
# chatglm-6b | |
# kernel_file = "./models/chatglm-6b-int4/quantization_kernels.so" | |
# tokenizer = AutoTokenizer.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="") | |
# model = AutoModel.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda() | |
# model = AutoModel.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="").float() | |
# model = model.quantize(bits=model_args.quantization_bit, kernel_file=kernel_file) | |
model = model.eval() | |
def chat(msg): | |
history = [] | |
response, history = model.chat(tokenizer, msg, history=history) | |
print("response:", response) | |
return response | |
iface = gr.Interface(fn=chat, inputs="text", outputs="text") | |
iface.launch() | |