JustinLin610 commited on
Commit
47ec9ba
1 Parent(s): c56a582

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +16 -8
README.md CHANGED
@@ -48,23 +48,31 @@ Here provides a code snippet with `apply_chat_template` to show you how to load
48
  from transformers import AutoModelForCausalLM, AutoTokenizer
49
  device = "cuda" # the device to load the model onto
50
 
51
- model = AutoModelForCausalLM.from_pretrained("Qwen2/Qwen2-beta-14B-Chat", device_map="auto")
 
 
 
52
  tokenizer = AutoTokenizer.from_pretrained("Qwen2/Qwen2-beta-14B-Chat")
53
 
54
  prompt = "Give me a short introduction to large language model."
55
-
56
  messages = [
57
  {"role": "system", "content": "You are a helpful assistant."},
58
  {"role": "user", "content": prompt}
59
  ]
60
-
61
- text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
62
-
 
 
63
  model_inputs = tokenizer([text], return_tensors="pt").to(device)
64
 
65
- generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
66
-
67
- generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
 
 
 
 
68
 
69
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
70
  ```
 
48
  from transformers import AutoModelForCausalLM, AutoTokenizer
49
  device = "cuda" # the device to load the model onto
50
 
51
+ model = AutoModelForCausalLM.from_pretrained(
52
+ "Qwen2/Qwen2-beta-14B-Chat",
53
+ device_map="auto"
54
+ )
55
  tokenizer = AutoTokenizer.from_pretrained("Qwen2/Qwen2-beta-14B-Chat")
56
 
57
  prompt = "Give me a short introduction to large language model."
 
58
  messages = [
59
  {"role": "system", "content": "You are a helpful assistant."},
60
  {"role": "user", "content": prompt}
61
  ]
62
+ text = tokenizer.apply_chat_template(
63
+ messages,
64
+ tokenize=False,
65
+ add_generation_prompt=True
66
+ )
67
  model_inputs = tokenizer([text], return_tensors="pt").to(device)
68
 
69
+ generated_ids = model.generate(
70
+ model_inputs.input_ids,
71
+ max_new_tokens=512
72
+ )
73
+ generated_ids = [
74
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
75
+ ]
76
 
77
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
78
  ```