Commit
•
feb68a2
1
Parent(s):
51daf1f
Update README.md
Browse files
README.md
CHANGED
@@ -129,7 +129,13 @@ prompt = [
|
|
129 |
|
130 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
131 |
|
132 |
-
inputs = tokenizer.apply_chat_template(
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
model = AutoGPTQForCausalLM.from_pretrained(
|
135 |
model_id,
|
@@ -138,7 +144,7 @@ model = AutoGPTQForCausalLM.from_pretrained(
|
|
138 |
device_map="auto",
|
139 |
)
|
140 |
|
141 |
-
outputs = model.generate(inputs, do_sample=True, max_new_tokens=256)
|
142 |
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
|
143 |
```
|
144 |
|
|
|
129 |
|
130 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
131 |
|
132 |
+
inputs = tokenizer.apply_chat_template(
|
133 |
+
prompt,
|
134 |
+
tokenize=True,
|
135 |
+
add_generation_prompt=True,
|
136 |
+
return_tensors="pt",
|
137 |
+
return_dict=True,
|
138 |
+
).to("cuda")
|
139 |
|
140 |
model = AutoGPTQForCausalLM.from_pretrained(
|
141 |
model_id,
|
|
|
144 |
device_map="auto",
|
145 |
)
|
146 |
|
147 |
+
outputs = model.generate(**inputs, do_sample=True, max_new_tokens=256)
|
148 |
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
|
149 |
```
|
150 |
|