File size: 1,640 Bytes
48c7a89 f2b7d1a 48c7a89 f2b7d1a 48c7a89 f2b7d1a 48c7a89 f2b7d1a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import torch
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer, pipeline
peft_model_id = "philschmid/gemma-7b-dolly-chatml"
# Load Model with PEFT adapter
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
model = AutoPeftModelForCausalLM.from_pretrained(peft_model_id, device_map="auto", torch_dtype=torch.float16)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
eos_token = tokenizer("<|im_end|>",add_special_tokens=False)["input_ids"][0]
print(f"eos_token: {eos_token}")
# run inference
messages = [
{
"role": "user",
"content": "What is the capital of Germany? Explain why thats the case and if it was different in the past?"
}
]
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=1024, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, eos_token_id=eos_token)
print(f"prompt:\n {messages[0]['content']}")
print(f"response:\n {outputs[0]['generated_text'][len(prompt):]}")
# run inference
messages = [
{
"role": "user",
"content": "In a town, 60% of the population are adults. Among the adults, 30% have a pet dog and 40% have a pet cat. What percentage of the total population has a pet dog?"
}
]
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=1024, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, eos_token_id=eos_token)
print(f"prompt:\n {messages[0]['content']}")
print(f"response:\n {outputs[0]['generated_text'][len(prompt):]}")
|