Spaces:
Paused
Paused
Create inference.py
Browse files- inference.py +13 -0
inference.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-3B-bnb-4bit")
|
| 5 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 6 |
+
"unsloth/Llama-3.2-3B-bnb-4bit",
|
| 7 |
+
trust_remote_code=True,
|
| 8 |
+
load_in_4bit=True,
|
| 9 |
+
device_map={"": 0}
|
| 10 |
+
)
|
| 11 |
+
input_ids = tokenizer("Hello, how are you?", return_tensors="pt").input_ids
|
| 12 |
+
output = model.generate(input_ids, max_new_tokens=50)
|
| 13 |
+
print(tokenizer.decode(output[0], skip_special_tokens=True))
|