Text Generation
Transformers
PyTorch
Chinese
English
llama
text-generation-inference
fireballoon commited on
Commit
9cbd412
1 Parent(s): b5998e0

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -0
README.md CHANGED
@@ -36,6 +36,7 @@ Inference with Transformers:
36
  >>> from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
37
  >>> tokenizer = AutoTokenizer.from_pretrained("fireballoon/baichuan-vicuna-7b", use_fast=False)
38
  >>> model = AutoModelForCausalLM.from_pretrained("fireballoon/baichuan-vicuna-7b").half().cuda()
 
39
  >>> instruction = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {} ASSISTANT:"
40
  >>> prompt = instruction.format("five tips to help with sleep") # user message
41
  >>> generate_ids = model.generate(tokenizer(prompt, return_tensors='pt').input_ids.cuda(), max_new_tokens=2048, streamer=streamer)
 
36
  >>> from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
37
  >>> tokenizer = AutoTokenizer.from_pretrained("fireballoon/baichuan-vicuna-7b", use_fast=False)
38
  >>> model = AutoModelForCausalLM.from_pretrained("fireballoon/baichuan-vicuna-7b").half().cuda()
39
+ >>> streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
40
  >>> instruction = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {} ASSISTANT:"
41
  >>> prompt = instruction.format("five tips to help with sleep") # user message
42
  >>> generate_ids = model.generate(tokenizer(prompt, return_tensors='pt').input_ids.cuda(), max_new_tokens=2048, streamer=streamer)