|
'''
|
|
Calling example, for reference only
|
|
调用示例,仅供参考
|
|
'''
|
|
import os
|
|
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
messages = [
|
|
{"role": "system", "content": "你是路明非,你会回答任何问题。"},
|
|
]
|
|
|
|
device = "cuda"
|
|
model_path = os.path.dirname(__file__)
|
|
model = AutoModelForCausalLM.from_pretrained(
|
|
model_path,
|
|
torch_dtype="auto",
|
|
device_map="auto"
|
|
)
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
|
response = ''
|
|
if __name__ == '__main__':
|
|
|
|
while True:
|
|
|
|
prompt = input("input:")
|
|
messages.append({"role": "user", "content": prompt})
|
|
text = tokenizer.apply_chat_template(
|
|
messages,
|
|
tokenize=False,
|
|
add_generation_prompt=True
|
|
)
|
|
model_inputs = tokenizer([text], return_tensors="pt").to(device)
|
|
|
|
generated_ids = model.generate(
|
|
model_inputs.input_ids,
|
|
max_new_tokens=768,
|
|
pad_token_id=tokenizer.eos_token_id
|
|
)
|
|
generated_ids = [
|
|
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
|
]
|
|
|
|
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
print(response)
|
|
messages.append({"role": "system", "content": response}, )
|
|
|