macadeliccc
commited on
Commit
·
2dea99e
1
Parent(s):
83af961
corrected 'generate' demo code
Browse fileschanged 'prompt' to 'messages' to correct generation error.
added explicit device assertion to alleviate this error:
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper_CUDA__index_select)
added eos token to prevent open ended generation
added a print statement so the user can read the generated content.
README.md
CHANGED
@@ -378,7 +378,8 @@ You will first need to install `transformers` and `accelerate` (just to ease the
|
|
378 |
import torch
|
379 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
380 |
|
381 |
-
|
|
|
382 |
tokenizer = AutoTokenizer.from_pretrained("argilla/notus-7b-v1")
|
383 |
|
384 |
messages = [
|
@@ -388,9 +389,10 @@ messages = [
|
|
388 |
},
|
389 |
{"role": "user", "content": "What's the best data annotation company out there in your opinion?"},
|
390 |
]
|
391 |
-
inputs = tokenizer.apply_chat_template(
|
392 |
-
outputs = model.generate(inputs, num_return_sequences=1, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
|
393 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
394 |
```
|
395 |
|
396 |
### Via `pipeline` method
|
|
|
378 |
import torch
|
379 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
380 |
|
381 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
382 |
+
model = AutoModelForCausalLM.from_pretrained("argilla/notus-7b-v1", torch_dtype=torch.bfloat16, device_map=device)
|
383 |
tokenizer = AutoTokenizer.from_pretrained("argilla/notus-7b-v1")
|
384 |
|
385 |
messages = [
|
|
|
389 |
},
|
390 |
{"role": "user", "content": "What's the best data annotation company out there in your opinion?"},
|
391 |
]
|
392 |
+
inputs = tokenizer.apply_chat_template(messages, tokenize=True, return_tensors="pt", add_generation_prompt=True).to(device)
|
393 |
+
outputs = model.generate(inputs, num_return_sequences=1, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, eos_token_id=tokenizer.eos_token_id)
|
394 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
395 |
+
print(response)
|
396 |
```
|
397 |
|
398 |
### Via `pipeline` method
|