from llama_cpp import Llama from transformers import AutoTokenizer gguf_path = <> llm = Llama(model_path=gguf_path, n_ctx=4096) tokenizer = AutoTokenizer.from_pretrained("MediaTek-Research/Breeze-7B-Instruct-v1_0") def QA(i): messages = [ { "content":"除了使用防毒軟體,還有哪些方法可以保護自己免受惡意軟體的侵害?", "role":"user" } ] question = tokenizer.apply_chat_template(messages, tokenize=False) output = llm( prompt = question, max_tokens = 1024, temperature = 0.7, top_p=0.9, presence_penalty=1, frequency_penalty=1 ) answer = output['choices'][0]['text'] print(answer) if __name__ == '__main__': for i in range(10): QA(i) print("done")