Update README.md
Browse files
README.md
CHANGED
@@ -28,7 +28,7 @@ tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-Chat-INCIT
|
|
28 |
model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-Chat-INCITE-2.8B-v1", torch_dtype=torch.float16)
|
29 |
model = model.to('cuda:0')
|
30 |
# infer
|
31 |
-
inputs = tokenizer("Hello", return_tensors='pt').to(model.device)
|
32 |
outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
|
33 |
output_str = tokenizer.decode(outputs[0])
|
34 |
print(output_str)
|
@@ -44,7 +44,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
44 |
tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-Chat-INCITE-2.8B-v1")
|
45 |
model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-Chat-INCITE-2.8B-v1", device_map="auto", load_in_8bit=True)
|
46 |
# infer
|
47 |
-
inputs = tokenizer("Hello", return_tensors='pt').to(model.device)
|
48 |
outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
|
49 |
output_str = tokenizer.decode(outputs[0])
|
50 |
print(output_str)
|
|
|
28 |
model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-Chat-INCITE-2.8B-v1", torch_dtype=torch.float16)
|
29 |
model = model.to('cuda:0')
|
30 |
# infer
|
31 |
+
inputs = tokenizer("<human>: Hello!\n<bot>:", return_tensors='pt').to(model.device)
|
32 |
outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
|
33 |
output_str = tokenizer.decode(outputs[0])
|
34 |
print(output_str)
|
|
|
44 |
tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-Chat-INCITE-2.8B-v1")
|
45 |
model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-Chat-INCITE-2.8B-v1", device_map="auto", load_in_8bit=True)
|
46 |
# infer
|
47 |
+
inputs = tokenizer("<human>: Hello!\n<bot>:", return_tensors='pt').to(model.device)
|
48 |
outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
|
49 |
output_str = tokenizer.decode(outputs[0])
|
50 |
print(output_str)
|