Moses25 commited on
Commit
264cc0d
1 Parent(s): 4f7be95

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +43 -3
README.md CHANGED
@@ -1,3 +1,43 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+
5
+ ```python
6
+ from awq import AutoAWQForCausalLM
7
+ from transformers import AutoTokenizer, TextStreamer
8
+
9
+
10
+ quant_path = "Moses25/Llama-3-8B-chat-32K-AWQ"
11
+
12
+ # Load model
13
+ model = AutoAWQForCausalLM.from_quantized(quant_path, fuse_layers=True)
14
+ tokenizer = AutoTokenizer.from_pretrained(quant_path, trust_remote_code=True)
15
+ streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
16
+
17
+ prompt = "You're standing on the surface of the Earth. "\
18
+ "You walk one mile south, one mile west and one mile north. "\
19
+ "You end up exactly where you started. Where are you?"
20
+
21
+ chat = [
22
+ {"role": "system", "content": "You are a concise assistant that helps answer questions."},
23
+ {"role": "user", "content": prompt},
24
+ ]
25
+
26
+ terminators = [
27
+ tokenizer.eos_token_id,
28
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
29
+ ]
30
+
31
+ tokens = tokenizer.apply_chat_template(
32
+ chat,
33
+ return_tensors="pt"
34
+ ).cuda()
35
+
36
+ # Generate output
37
+ generation_output = model.generate(
38
+ tokens,
39
+ streamer=streamer,
40
+ max_new_tokens=2048,
41
+ eos_token_id=terminators
42
+ )
43
+ ```