from transformers import AutoTokenizer, AutoModel kernel_file = ".\\models\\quantization_kernels.so" tokenizer = AutoTokenizer.from_pretrained(".\\", trust_remote_code=True) model = AutoModel.from_pretrained(".\\", trust_remote_code=True).float() # model = model.quantize(bits=4, kernel_file=kernel_file) # response, history = model.chat(tokenizer, "你好", history=[]) # print("response:", response) tokenizer.save_pretrained("models") model.save_pretrained("models")