bhavyaaiplanet commited on
Commit
e06a18a
1 Parent(s): 6bae5f8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -2
README.md CHANGED
@@ -27,6 +27,17 @@ effi 7b AWQ is a quantized version of effi 7b whiich is a 7 billion parameter mo
27
  - **License:** Apache 2.0
28
  - **Quantized from model:** Effi-7b
29
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  ### Example of usage
32
 
@@ -62,6 +73,10 @@ outputs = model.generate(input_ids=input_ids, max_new_tokens=512, top_p=0.9,temp
62
 
63
  # Print the result
64
 
65
- print(f"{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(template):].split(' [/INST]')[0]}")
 
 
66
 
67
- ```
 
 
 
27
  - **License:** Apache 2.0
28
  - **Quantized from model:** Effi-7b
29
 
30
+ ### Qunatization Configuration
31
+
32
+
33
+ "zero_point": true,
34
+ "q_group_size": 128,
35
+ "w_bit": 4,
36
+ "version": "GEMM",
37
+ "modules_to_not_convert": null
38
+
39
+
40
+
41
 
42
  ### Example of usage
43
 
 
73
 
74
  # Print the result
75
 
76
+ print(f"{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(template):]}")
77
+
78
+ ```
79
 
80
+ ### Framework versions
81
+ - Transformers 4.37.2
82
+ - Autoawq 0.1.8