JustinLin610 commited on
Commit
ab0b543
1 Parent(s): cffcc20

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +16 -8
README.md CHANGED
@@ -46,23 +46,31 @@ Here provides a code snippet with `apply_chat_template` to show you how to load
46
  from transformers import AutoModelForCausalLM, AutoTokenizer
47
  device = "cuda" # the device to load the model onto
48
 
49
- model = AutoModelForCausalLM.from_pretrained("Qwen2/Qwen2-beta-0_5B-Chat", device_map="auto")
 
 
 
50
  tokenizer = AutoTokenizer.from_pretrained("Qwen2/Qwen2-beta-0_5B-Chat")
51
 
52
  prompt = "Give me a short introduction to large language model."
53
-
54
  messages = [
55
  {"role": "system", "content": "You are a helpful assistant."},
56
  {"role": "user", "content": prompt}
57
  ]
58
-
59
- text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
60
-
 
 
61
  model_inputs = tokenizer([text], return_tensors="pt").to(device)
62
 
63
- generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
64
-
65
- generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
 
 
 
 
66
 
67
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
68
  ```
 
46
  from transformers import AutoModelForCausalLM, AutoTokenizer
47
  device = "cuda" # the device to load the model onto
48
 
49
+ model = AutoModelForCausalLM.from_pretrained(
50
+ "Qwen2/Qwen2-beta-0_5B-Chat",
51
+ device_map="auto"
52
+ )
53
  tokenizer = AutoTokenizer.from_pretrained("Qwen2/Qwen2-beta-0_5B-Chat")
54
 
55
  prompt = "Give me a short introduction to large language model."
 
56
  messages = [
57
  {"role": "system", "content": "You are a helpful assistant."},
58
  {"role": "user", "content": prompt}
59
  ]
60
+ text = tokenizer.apply_chat_template(
61
+ messages,
62
+ tokenize=False,
63
+ add_generation_prompt=True
64
+ )
65
  model_inputs = tokenizer([text], return_tensors="pt").to(device)
66
 
67
+ generated_ids = model.generate(
68
+ model_inputs.input_ids,
69
+ max_new_tokens=512
70
+ )
71
+ generated_ids = [
72
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
73
+ ]
74
 
75
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
76
  ```