File size: 500 Bytes
91fb57d
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-3B-bnb-4bit")
model = AutoModelForCausalLM.from_pretrained(
    "unsloth/Llama-3.2-3B-bnb-4bit",
    trust_remote_code=True,
    load_in_4bit=True,
    device_map={"": 0}
)
input_ids = tokenizer("Hello, how are you?", return_tensors="pt").input_ids
output = model.generate(input_ids, max_new_tokens=50)
print(tokenizer.decode(output[0], skip_special_tokens=True))