|
from lyra_baichuan import lyraBaichuan7B, lyraBaichuan13B |
|
|
|
model_path = "./models/Baichuan2-13B-Base-lyra" |
|
tokenizer_path = "./models/Baichuan2-13B-Base-lyra" |
|
inference_dtype = 'fp16' |
|
prompt = "登鹳雀楼->王之涣\n夜雨寄北->" |
|
|
|
memopt_mode = 1 |
|
max_output_length = 64 |
|
arch = "Ampere" |
|
cuda_version = 12 |
|
|
|
model = lyraBaichuan13B(model_path, |
|
tokenizer_path = tokenizer_path, |
|
dtype = inference_dtype, |
|
memopt_mode = memopt_mode, |
|
arch = arch, |
|
cuda_version = cuda_version) |
|
|
|
bs = 1 |
|
prompts = [prompt, ] * bs |
|
output_texts = model.generate( |
|
prompts, output_length=max_output_length, |
|
top_k=30, top_p=0.85, temperature=1.0, repetition_penalty=1.0, do_sample=False) |
|
|
|
print(output_texts) |