Xenova HF staff commited on
Commit
241b86d
1 Parent(s): a6df051
Files changed (1) hide show
  1. README.md +2 -5
README.md CHANGED
@@ -102,18 +102,15 @@ prompt = [
102
  ]
103
 
104
  tokenizer = AutoTokenizer.from_pretrained(model_id)
105
-
106
- inputs = tokenizer.apply_chat_template(prompt, tokenize=True, add_generation_prompt=True, return_tensors="pt").cuda()
107
-
108
  model = AutoAWQForCausalLM.from_pretrained(
109
  model_id,
110
  torch_dtype=torch.float16,
111
  low_cpu_mem_usage=True,
112
  device_map="auto",
113
- fuse_layers=True,
114
  )
115
 
116
- outputs = model.generate(inputs, do_sample=True, max_new_tokens=256)
 
117
  print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
118
  ```
119
 
 
102
  ]
103
 
104
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
 
105
  model = AutoAWQForCausalLM.from_pretrained(
106
  model_id,
107
  torch_dtype=torch.float16,
108
  low_cpu_mem_usage=True,
109
  device_map="auto",
 
110
  )
111
 
112
+ inputs = tokenizer.apply_chat_template(prompt, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to('cuda')
113
+ outputs = model.generate(**inputs, do_sample=True, max_new_tokens=256)
114
  print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
115
  ```
116