plabadens commited on
Commit
34d2958
0 Parent(s):

Inital model conversion

Browse files
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "huggyllama/llama-13b",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 5120,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 13824,
12
+ "max_position_embeddings": 2048,
13
+ "max_sequence_length": 2048,
14
+ "model_type": "llama",
15
+ "num_attention_heads": 40,
16
+ "num_hidden_layers": 40,
17
+ "pad_token_id": 0,
18
+ "rms_norm_eps": 1e-06,
19
+ "tie_word_embeddings": false,
20
+ "torch_dtype": "bfloat16",
21
+ "transformers_version": "4.30.0.dev0",
22
+ "use_cache": true,
23
+ "vocab_size": 32000
24
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.30.0.dev0"
7
+ }
manticore-13b-4bit-128g.log ADDED
@@ -0,0 +1,849 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Found cached dataset json (~/.cache/huggingface/datasets/allenai___json/allenai--c4-6fbe877195f42de5/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)
3
+ Found cached dataset json (~/.cache/huggingface/datasets/allenai___json/allenai--c4-efc3d4f4606f44bd/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)
4
+ Token indices sequence length is longer than the specified maximum sequence length for this model (3908 > 2048). Running this sequence through the model will result in indexing errors
5
+ Starting ...
6
+ Ready.
7
+ Quantizing layer 1/40..
8
+ +------------------+--------------+------------+-----------+-------+
9
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
10
+ +==================+==============+============+===========+=======+
11
+ | self_attn.k_proj | 699.085 | - | - | 2.969 |
12
+ | self_attn.v_proj | 20.162 | - | - | 1.529 |
13
+ | self_attn.q_proj | 663.384 | - | - | 1.536 |
14
+ | self_attn.o_proj | 2.897 | - | - | 2.057 |
15
+ | mlp.up_proj | 126.477 | - | - | 2.155 |
16
+ | mlp.gate_proj | 136.255 | - | - | 1.492 |
17
+ | mlp.down_proj | 7.972 | - | - | 5.673 |
18
+ +------------------+--------------+------------+-----------+-------+
19
+
20
+
21
+ Quantizing layer 2/40..
22
+ +------------------+--------------+------------+-----------+-------+
23
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
24
+ +==================+==============+============+===========+=======+
25
+ | self_attn.k_proj | 833.716 | - | - | 2.145 |
26
+ | self_attn.v_proj | 84.632 | - | - | 1.437 |
27
+ | self_attn.q_proj | 792.299 | - | - | 1.425 |
28
+ | self_attn.o_proj | 17.699 | - | - | 2.037 |
29
+ | mlp.up_proj | 866.981 | - | - | 2.223 |
30
+ | mlp.gate_proj | 984.281 | - | - | 1.536 |
31
+ | mlp.down_proj | 69.406 | - | - | 6.099 |
32
+ +------------------+--------------+------------+-----------+-------+
33
+
34
+
35
+ Quantizing layer 3/40..
36
+ +------------------+--------------+------------+-----------+-------+
37
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
38
+ +==================+==============+============+===========+=======+
39
+ | self_attn.k_proj | 2007.076 | - | - | 2.197 |
40
+ | self_attn.v_proj | 316.141 | - | - | 1.417 |
41
+ | self_attn.q_proj | 1877.389 | - | - | 1.552 |
42
+ | self_attn.o_proj | 48.965 | - | - | 2.256 |
43
+ | mlp.up_proj | 2437.944 | - | - | 2.213 |
44
+ | mlp.gate_proj | 2980.109 | - | - | 1.556 |
45
+ | mlp.down_proj | 323.876 | - | - | 6.116 |
46
+ +------------------+--------------+------------+-----------+-------+
47
+
48
+
49
+ Quantizing layer 4/40..
50
+ +------------------+--------------+------------+-----------+-------+
51
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
52
+ +==================+==============+============+===========+=======+
53
+ | self_attn.k_proj | 7247.779 | - | - | 2.183 |
54
+ | self_attn.v_proj | 2281.976 | - | - | 1.459 |
55
+ | self_attn.q_proj | 7044.623 | - | - | 1.492 |
56
+ | self_attn.o_proj | 95.820 | - | - | 2.235 |
57
+ | mlp.up_proj | 4585.885 | - | - | 2.378 |
58
+ | mlp.gate_proj | 5477.426 | - | - | 1.624 |
59
+ | mlp.down_proj | 336.858 | - | - | 6.299 |
60
+ +------------------+--------------+------------+-----------+-------+
61
+
62
+
63
+ Quantizing layer 5/40..
64
+ +------------------+--------------+------------+-----------+-------+
65
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
66
+ +==================+==============+============+===========+=======+
67
+ | self_attn.k_proj | 9244.811 | - | - | 2.210 |
68
+ | self_attn.v_proj | 3243.931 | - | - | 1.544 |
69
+ | self_attn.q_proj | 9085.997 | - | - | 1.505 |
70
+ | self_attn.o_proj | 135.217 | - | - | 2.111 |
71
+ | mlp.up_proj | 6404.132 | - | - | 2.189 |
72
+ | mlp.gate_proj | 7805.458 | - | - | 1.572 |
73
+ | mlp.down_proj | 558.483 | - | - | 5.889 |
74
+ +------------------+--------------+------------+-----------+-------+
75
+
76
+
77
+ Quantizing layer 6/40..
78
+ +------------------+--------------+------------+-----------+-------+
79
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
80
+ +==================+==============+============+===========+=======+
81
+ | self_attn.k_proj | 12590.303 | - | - | 2.208 |
82
+ | self_attn.v_proj | 4789.499 | - | - | 1.485 |
83
+ | self_attn.q_proj | 12458.196 | - | - | 1.677 |
84
+ | self_attn.o_proj | 185.082 | - | - | 2.269 |
85
+ | mlp.up_proj | 8282.830 | - | - | 2.188 |
86
+ | mlp.gate_proj | 9948.463 | - | - | 1.533 |
87
+ | mlp.down_proj | 799.254 | - | - | 5.824 |
88
+ +------------------+--------------+------------+-----------+-------+
89
+
90
+
91
+ Quantizing layer 7/40..
92
+ +------------------+--------------+------------+-----------+-------+
93
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
94
+ +==================+==============+============+===========+=======+
95
+ | self_attn.k_proj | 12815.720 | - | - | 2.281 |
96
+ | self_attn.v_proj | 4819.657 | - | - | 1.550 |
97
+ | self_attn.q_proj | 12782.796 | - | - | 1.601 |
98
+ | self_attn.o_proj | 443.461 | - | - | 2.010 |
99
+ | mlp.up_proj | 9536.821 | - | - | 2.267 |
100
+ | mlp.gate_proj | 11615.631 | - | - | 1.582 |
101
+ | mlp.down_proj | 1365.899 | - | - | 5.818 |
102
+ +------------------+--------------+------------+-----------+-------+
103
+
104
+
105
+ Quantizing layer 8/40..
106
+ +------------------+--------------+------------+-----------+-------+
107
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
108
+ +==================+==============+============+===========+=======+
109
+ | self_attn.k_proj | 14306.873 | - | - | 2.319 |
110
+ | self_attn.v_proj | 6024.040 | - | - | 1.529 |
111
+ | self_attn.q_proj | 14025.603 | - | - | 1.475 |
112
+ | self_attn.o_proj | 533.999 | - | - | 2.126 |
113
+ | mlp.up_proj | 11076.211 | - | - | 2.303 |
114
+ | mlp.gate_proj | 12810.490 | - | - | 1.554 |
115
+ | mlp.down_proj | 1382.088 | - | - | 5.916 |
116
+ +------------------+--------------+------------+-----------+-------+
117
+
118
+
119
+ Quantizing layer 9/40..
120
+ +------------------+--------------+------------+-----------+-------+
121
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
122
+ +==================+==============+============+===========+=======+
123
+ | self_attn.k_proj | 18277.871 | - | - | 2.216 |
124
+ | self_attn.v_proj | 8258.400 | - | - | 1.561 |
125
+ | self_attn.q_proj | 18689.828 | - | - | 1.627 |
126
+ | self_attn.o_proj | 638.868 | - | - | 2.238 |
127
+ | mlp.up_proj | 12678.387 | - | - | 2.339 |
128
+ | mlp.gate_proj | 14003.211 | - | - | 1.568 |
129
+ | mlp.down_proj | 1604.783 | - | - | 5.831 |
130
+ +------------------+--------------+------------+-----------+-------+
131
+
132
+
133
+ Quantizing layer 10/40..
134
+ +------------------+--------------+------------+-----------+-------+
135
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
136
+ +==================+==============+============+===========+=======+
137
+ | self_attn.k_proj | 17910.699 | - | - | 2.110 |
138
+ | self_attn.v_proj | 7896.578 | - | - | 1.476 |
139
+ | self_attn.q_proj | 17414.641 | - | - | 1.455 |
140
+ | self_attn.o_proj | 821.772 | - | - | 2.139 |
141
+ | mlp.up_proj | 14038.678 | - | - | 2.091 |
142
+ | mlp.gate_proj | 15241.850 | - | - | 1.474 |
143
+ | mlp.down_proj | 1920.510 | - | - | 6.219 |
144
+ +------------------+--------------+------------+-----------+-------+
145
+
146
+
147
+ Quantizing layer 11/40..
148
+ +------------------+--------------+------------+-----------+-------+
149
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
150
+ +==================+==============+============+===========+=======+
151
+ | self_attn.k_proj | 18869.732 | - | - | 2.180 |
152
+ | self_attn.v_proj | 9237.131 | - | - | 1.484 |
153
+ | self_attn.q_proj | 18370.957 | - | - | 1.477 |
154
+ | self_attn.o_proj | 969.532 | - | - | 2.147 |
155
+ | mlp.up_proj | 15312.948 | - | - | 2.398 |
156
+ | mlp.gate_proj | 16015.623 | - | - | 1.711 |
157
+ | mlp.down_proj | 2238.236 | - | - | 5.736 |
158
+ +------------------+--------------+------------+-----------+-------+
159
+
160
+
161
+ Quantizing layer 12/40..
162
+ +------------------+--------------+------------+-----------+-------+
163
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
164
+ +==================+==============+============+===========+=======+
165
+ | self_attn.k_proj | 20102.830 | - | - | 2.140 |
166
+ | self_attn.v_proj | 9838.231 | - | - | 1.473 |
167
+ | self_attn.q_proj | 19395.590 | - | - | 1.452 |
168
+ | self_attn.o_proj | 1331.936 | - | - | 2.024 |
169
+ | mlp.up_proj | 16841.980 | - | - | 2.165 |
170
+ | mlp.gate_proj | 17118.094 | - | - | 1.480 |
171
+ | mlp.down_proj | 2681.943 | - | - | 5.950 |
172
+ +------------------+--------------+------------+-----------+-------+
173
+
174
+
175
+ Quantizing layer 13/40..
176
+ +------------------+--------------+------------+-----------+-------+
177
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
178
+ +==================+==============+============+===========+=======+
179
+ | self_attn.k_proj | 20447.057 | - | - | 2.293 |
180
+ | self_attn.v_proj | 10192.447 | - | - | 1.541 |
181
+ | self_attn.q_proj | 19843.822 | - | - | 1.540 |
182
+ | self_attn.o_proj | 1744.482 | - | - | 2.103 |
183
+ | mlp.up_proj | 17869.541 | - | - | 2.092 |
184
+ | mlp.gate_proj | 17824.297 | - | - | 1.521 |
185
+ | mlp.down_proj | 3193.839 | - | - | 6.185 |
186
+ +------------------+--------------+------------+-----------+-------+
187
+
188
+
189
+ Quantizing layer 14/40..
190
+ +------------------+--------------+------------+-----------+-------+
191
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
192
+ +==================+==============+============+===========+=======+
193
+ | self_attn.k_proj | 22471.352 | - | - | 2.283 |
194
+ | self_attn.v_proj | 13197.844 | - | - | 1.526 |
195
+ | self_attn.q_proj | 21965.322 | - | - | 1.511 |
196
+ | self_attn.o_proj | 1889.137 | - | - | 2.187 |
197
+ | mlp.up_proj | 20171.234 | - | - | 2.470 |
198
+ | mlp.gate_proj | 20078.652 | - | - | 1.753 |
199
+ | mlp.down_proj | 3986.132 | - | - | 5.910 |
200
+ +------------------+--------------+------------+-----------+-------+
201
+
202
+
203
+ Quantizing layer 15/40..
204
+ +------------------+--------------+------------+-----------+-------+
205
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
206
+ +==================+==============+============+===========+=======+
207
+ | self_attn.k_proj | 21548.637 | - | - | 2.269 |
208
+ | self_attn.v_proj | 13209.446 | - | - | 1.533 |
209
+ | self_attn.q_proj | 20824.828 | - | - | 1.576 |
210
+ | self_attn.o_proj | 1764.429 | - | - | 2.162 |
211
+ | mlp.up_proj | 22525.393 | - | - | 2.246 |
212
+ | mlp.gate_proj | 22575.895 | - | - | 1.552 |
213
+ | mlp.down_proj | 4570.991 | - | - | 5.823 |
214
+ +------------------+--------------+------------+-----------+-------+
215
+
216
+
217
+ Quantizing layer 16/40..
218
+ +------------------+--------------+------------+-----------+-------+
219
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
220
+ +==================+==============+============+===========+=======+
221
+ | self_attn.k_proj | 24878.395 | - | - | 2.239 |
222
+ | self_attn.v_proj | 15705.982 | - | - | 1.527 |
223
+ | self_attn.q_proj | 24505.125 | - | - | 1.517 |
224
+ | self_attn.o_proj | 1836.744 | - | - | 2.169 |
225
+ | mlp.up_proj | 24725.775 | - | - | 2.263 |
226
+ | mlp.gate_proj | 24939.625 | - | - | 1.629 |
227
+ | mlp.down_proj | 5293.017 | - | - | 6.001 |
228
+ +------------------+--------------+------------+-----------+-------+
229
+
230
+
231
+ Quantizing layer 17/40..
232
+ +------------------+--------------+------------+-----------+-------+
233
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
234
+ +==================+==============+============+===========+=======+
235
+ | self_attn.k_proj | 24504.688 | - | - | 2.240 |
236
+ | self_attn.v_proj | 16056.951 | - | - | 1.547 |
237
+ | self_attn.q_proj | 23832.211 | - | - | 1.547 |
238
+ | self_attn.o_proj | 1557.532 | - | - | 2.148 |
239
+ | mlp.up_proj | 27068.410 | - | - | 2.245 |
240
+ | mlp.gate_proj | 27383.363 | - | - | 1.551 |
241
+ | mlp.down_proj | 5846.998 | - | - | 5.885 |
242
+ +------------------+--------------+------------+-----------+-------+
243
+
244
+
245
+ Quantizing layer 18/40..
246
+ +------------------+--------------+------------+-----------+-------+
247
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
248
+ +==================+==============+============+===========+=======+
249
+ | self_attn.k_proj | 23499.668 | - | - | 2.245 |
250
+ | self_attn.v_proj | 14901.561 | - | - | 1.506 |
251
+ | self_attn.q_proj | 22453.777 | - | - | 1.531 |
252
+ | self_attn.o_proj | 1982.065 | - | - | 2.138 |
253
+ | mlp.up_proj | 28787.129 | - | - | 2.233 |
254
+ | mlp.gate_proj | 29317.820 | - | - | 1.525 |
255
+ | mlp.down_proj | 6634.873 | - | - | 5.954 |
256
+ +------------------+--------------+------------+-----------+-------+
257
+
258
+
259
+ Quantizing layer 19/40..
260
+ +------------------+--------------+------------+-----------+-------+
261
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
262
+ +==================+==============+============+===========+=======+
263
+ | self_attn.k_proj | 25263.570 | - | - | 2.261 |
264
+ | self_attn.v_proj | 17757.848 | - | - | 1.533 |
265
+ | self_attn.q_proj | 24840.973 | - | - | 1.529 |
266
+ | self_attn.o_proj | 2037.134 | - | - | 2.174 |
267
+ | mlp.up_proj | 30709.844 | - | - | 2.243 |
268
+ | mlp.gate_proj | 31749.461 | - | - | 1.566 |
269
+ | mlp.down_proj | 7825.893 | - | - | 5.977 |
270
+ +------------------+--------------+------------+-----------+-------+
271
+
272
+
273
+ Quantizing layer 20/40..
274
+ +------------------+--------------+------------+-----------+-------+
275
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
276
+ +==================+==============+============+===========+=======+
277
+ | self_attn.k_proj | 25034.102 | - | - | 2.202 |
278
+ | self_attn.v_proj | 18874.211 | - | - | 1.487 |
279
+ | self_attn.q_proj | 24577.367 | - | - | 1.486 |
280
+ | self_attn.o_proj | 2441.195 | - | - | 2.198 |
281
+ | mlp.up_proj | 33281.070 | - | - | 2.217 |
282
+ | mlp.gate_proj | 34847.070 | - | - | 1.547 |
283
+ | mlp.down_proj | 9326.249 | - | - | 5.867 |
284
+ +------------------+--------------+------------+-----------+-------+
285
+
286
+
287
+ Quantizing layer 21/40..
288
+ +------------------+--------------+------------+-----------+-------+
289
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
290
+ +==================+==============+============+===========+=======+
291
+ | self_attn.k_proj | 24243.559 | - | - | 2.254 |
292
+ | self_attn.v_proj | 18996.426 | - | - | 1.544 |
293
+ | self_attn.q_proj | 24238.959 | - | - | 1.530 |
294
+ | self_attn.o_proj | 2421.684 | - | - | 2.162 |
295
+ | mlp.up_proj | 35619.750 | - | - | 2.249 |
296
+ | mlp.gate_proj | 37851.703 | - | - | 1.558 |
297
+ | mlp.down_proj | 10490.234 | - | - | 5.957 |
298
+ +------------------+--------------+------------+-----------+-------+
299
+
300
+
301
+ Quantizing layer 22/40..
302
+ +------------------+--------------+------------+-----------+-------+
303
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
304
+ +==================+==============+============+===========+=======+
305
+ | self_attn.k_proj | 24204.869 | - | - | 2.233 |
306
+ | self_attn.v_proj | 20761.924 | - | - | 1.548 |
307
+ | self_attn.q_proj | 24269.518 | - | - | 1.488 |
308
+ | self_attn.o_proj | 2956.973 | - | - | 2.183 |
309
+ | mlp.up_proj | 37259.047 | - | - | 2.210 |
310
+ | mlp.gate_proj | 39854.340 | - | - | 1.527 |
311
+ | mlp.down_proj | 11912.477 | - | - | 5.834 |
312
+ +------------------+--------------+------------+-----------+-------+
313
+
314
+
315
+ Quantizing layer 23/40..
316
+ +------------------+--------------+------------+-----------+-------+
317
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
318
+ +==================+==============+============+===========+=======+
319
+ | self_attn.k_proj | 23936.551 | - | - | 2.221 |
320
+ | self_attn.v_proj | 20511.947 | - | - | 1.475 |
321
+ | self_attn.q_proj | 23747.645 | - | - | 1.482 |
322
+ | self_attn.o_proj | 2550.344 | - | - | 2.146 |
323
+ | mlp.up_proj | 38672.172 | - | - | 2.227 |
324
+ | mlp.gate_proj | 42040.203 | - | - | 1.510 |
325
+ | mlp.down_proj | 13073.925 | - | - | 5.963 |
326
+ +------------------+--------------+------------+-----------+-------+
327
+
328
+
329
+ Quantizing layer 24/40..
330
+ +------------------+--------------+------------+-----------+-------+
331
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
332
+ +==================+==============+============+===========+=======+
333
+ | self_attn.k_proj | 26160.334 | - | - | 2.235 |
334
+ | self_attn.v_proj | 24639.689 | - | - | 1.486 |
335
+ | self_attn.q_proj | 26502.482 | - | - | 1.483 |
336
+ | self_attn.o_proj | 2817.627 | - | - | 2.133 |
337
+ | mlp.up_proj | 40697.367 | - | - | 2.237 |
338
+ | mlp.gate_proj | 44920.602 | - | - | 1.545 |
339
+ | mlp.down_proj | 14389.604 | - | - | 5.868 |
340
+ +------------------+--------------+------------+-----------+-------+
341
+
342
+
343
+ Quantizing layer 25/40..
344
+ +------------------+--------------+------------+-----------+-------+
345
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
346
+ +==================+==============+============+===========+=======+
347
+ | self_attn.k_proj | 25879.900 | - | - | 2.257 |
348
+ | self_attn.v_proj | 24740.775 | - | - | 1.511 |
349
+ | self_attn.q_proj | 26419.555 | - | - | 1.506 |
350
+ | self_attn.o_proj | 2836.316 | - | - | 2.129 |
351
+ | mlp.up_proj | 42787.164 | - | - | 2.210 |
352
+ | mlp.gate_proj | 47598.883 | - | - | 1.560 |
353
+ | mlp.down_proj | 15096.459 | - | - | 5.970 |
354
+ +------------------+--------------+------------+-----------+-------+
355
+
356
+
357
+ Quantizing layer 26/40..
358
+ +------------------+--------------+------------+-----------+-------+
359
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
360
+ +==================+==============+============+===========+=======+
361
+ | self_attn.k_proj | 25009.330 | - | - | 2.197 |
362
+ | self_attn.v_proj | 25032.744 | - | - | 1.477 |
363
+ | self_attn.q_proj | 25241.531 | - | - | 1.479 |
364
+ | self_attn.o_proj | 2970.640 | - | - | 2.127 |
365
+ | mlp.up_proj | 44925.082 | - | - | 2.202 |
366
+ | mlp.gate_proj | 50018.508 | - | - | 1.564 |
367
+ | mlp.down_proj | 16100.578 | - | - | 5.851 |
368
+ +------------------+--------------+------------+-----------+-------+
369
+
370
+
371
+ Quantizing layer 27/40..
372
+ +------------------+--------------+------------+-----------+-------+
373
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
374
+ +==================+==============+============+===========+=======+
375
+ | self_attn.k_proj | 28136.842 | - | - | 2.203 |
376
+ | self_attn.v_proj | 28984.514 | - | - | 1.529 |
377
+ | self_attn.q_proj | 28480.695 | - | - | 1.521 |
378
+ | self_attn.o_proj | 2583.247 | - | - | 2.163 |
379
+ | mlp.up_proj | 47577.098 | - | - | 2.224 |
380
+ | mlp.gate_proj | 52985.324 | - | - | 1.524 |
381
+ | mlp.down_proj | 16923.477 | - | - | 5.920 |
382
+ +------------------+--------------+------------+-----------+-------+
383
+
384
+
385
+ Quantizing layer 28/40..
386
+ +------------------+--------------+------------+-----------+-------+
387
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
388
+ +==================+==============+============+===========+=======+
389
+ | self_attn.k_proj | 27372.766 | - | - | 2.218 |
390
+ | self_attn.v_proj | 29232.785 | - | - | 1.498 |
391
+ | self_attn.q_proj | 27755.496 | - | - | 1.516 |
392
+ | self_attn.o_proj | 2929.728 | - | - | 2.153 |
393
+ | mlp.up_proj | 49762.422 | - | - | 2.210 |
394
+ | mlp.gate_proj | 55543.320 | - | - | 1.547 |
395
+ | mlp.down_proj | 17614.914 | - | - | 5.478 |
396
+ +------------------+--------------+------------+-----------+-------+
397
+
398
+
399
+ Quantizing layer 29/40..
400
+ +------------------+--------------+------------+-----------+-------+
401
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
402
+ +==================+==============+============+===========+=======+
403
+ | self_attn.k_proj | 28571.195 | - | - | 2.112 |
404
+ | self_attn.v_proj | 28252.555 | - | - | 1.441 |
405
+ | self_attn.q_proj | 29134.234 | - | - | 1.584 |
406
+ | self_attn.o_proj | 2582.378 | - | - | 2.154 |
407
+ | mlp.up_proj | 51553.789 | - | - | 2.268 |
408
+ | mlp.gate_proj | 57860.793 | - | - | 1.526 |
409
+ | mlp.down_proj | 18117.818 | - | - | 5.944 |
410
+ +------------------+--------------+------------+-----------+-------+
411
+
412
+
413
+ Quantizing layer 30/40..
414
+ +------------------+--------------+------------+-----------+-------+
415
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
416
+ +==================+==============+============+===========+=======+
417
+ | self_attn.k_proj | 28818.846 | - | - | 2.235 |
418
+ | self_attn.v_proj | 29999.676 | - | - | 1.507 |
419
+ | self_attn.q_proj | 29018.770 | - | - | 1.513 |
420
+ | self_attn.o_proj | 2406.745 | - | - | 2.136 |
421
+ | mlp.up_proj | 54442.094 | - | - | 2.222 |
422
+ | mlp.gate_proj | 60799.031 | - | - | 1.561 |
423
+ | mlp.down_proj | 18606.734 | - | - | 5.950 |
424
+ +------------------+--------------+------------+-----------+-------+
425
+
426
+
427
+ Quantizing layer 31/40..
428
+ +------------------+--------------+------------+-----------+-------+
429
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
430
+ +==================+==============+============+===========+=======+
431
+ | self_attn.k_proj | 31552.672 | - | - | 2.238 |
432
+ | self_attn.v_proj | 32085.520 | - | - | 1.517 |
433
+ | self_attn.q_proj | 31604.691 | - | - | 1.512 |
434
+ | self_attn.o_proj | 2696.787 | - | - | 2.193 |
435
+ | mlp.up_proj | 57001.633 | - | - | 2.223 |
436
+ | mlp.gate_proj | 63375.883 | - | - | 1.523 |
437
+ | mlp.down_proj | 19194.986 | - | - | 5.921 |
438
+ +------------------+--------------+------------+-----------+-------+
439
+
440
+
441
+ Quantizing layer 32/40..
442
+ +------------------+--------------+------------+-----------+-------+
443
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
444
+ +==================+==============+============+===========+=======+
445
+ | self_attn.k_proj | 30807.047 | - | - | 2.214 |
446
+ | self_attn.v_proj | 33207.539 | - | - | 1.545 |
447
+ | self_attn.q_proj | 30847.758 | - | - | 1.510 |
448
+ | self_attn.o_proj | 2343.647 | - | - | 2.143 |
449
+ | mlp.up_proj | 59283.980 | - | - | 2.254 |
450
+ | mlp.gate_proj | 65641.500 | - | - | 1.526 |
451
+ | mlp.down_proj | 20043.617 | - | - | 5.885 |
452
+ +------------------+--------------+------------+-----------+-------+
453
+
454
+
455
+ Quantizing layer 33/40..
456
+ +------------------+--------------+------------+-----------+-------+
457
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
458
+ +==================+==============+============+===========+=======+
459
+ | self_attn.k_proj | 32050.086 | - | - | 2.220 |
460
+ | self_attn.v_proj | 35367.578 | - | - | 1.524 |
461
+ | self_attn.q_proj | 32181.414 | - | - | 1.484 |
462
+ | self_attn.o_proj | 1997.855 | - | - | 2.133 |
463
+ | mlp.up_proj | 62253.375 | - | - | 2.259 |
464
+ | mlp.gate_proj | 67897.727 | - | - | 1.522 |
465
+ | mlp.down_proj | 20735.859 | - | - | 5.862 |
466
+ +------------------+--------------+------------+-----------+-------+
467
+
468
+
469
+ Quantizing layer 34/40..
470
+ +------------------+--------------+------------+-----------+-------+
471
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
472
+ +==================+==============+============+===========+=======+
473
+ | self_attn.k_proj | 34280.840 | - | - | 2.217 |
474
+ | self_attn.v_proj | 37861.223 | - | - | 1.493 |
475
+ | self_attn.q_proj | 34754.984 | - | - | 1.570 |
476
+ | self_attn.o_proj | 2229.951 | - | - | 2.141 |
477
+ | mlp.up_proj | 64770.355 | - | - | 2.228 |
478
+ | mlp.gate_proj | 69814.594 | - | - | 1.562 |
479
+ | mlp.down_proj | 22216.566 | - | - | 5.922 |
480
+ +------------------+--------------+------------+-----------+-------+
481
+
482
+
483
+ Quantizing layer 35/40..
484
+ +------------------+--------------+------------+-----------+-------+
485
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
486
+ +==================+==============+============+===========+=======+
487
+ | self_attn.k_proj | 30260.629 | - | - | 2.191 |
488
+ | self_attn.v_proj | 33725.844 | - | - | 1.476 |
489
+ | self_attn.q_proj | 30395.316 | - | - | 1.472 |
490
+ | self_attn.o_proj | 3188.567 | - | - | 2.149 |
491
+ | mlp.up_proj | 66777.297 | - | - | 2.297 |
492
+ | mlp.gate_proj | 70337.727 | - | - | 1.547 |
493
+ | mlp.down_proj | 24092.455 | - | - | 5.939 |
494
+ +------------------+--------------+------------+-----------+-------+
495
+
496
+
497
+ Quantizing layer 36/40..
498
+ +------------------+--------------+------------+-----------+-------+
499
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
500
+ +==================+==============+============+===========+=======+
501
+ | self_attn.k_proj | 27951.232 | - | - | 2.108 |
502
+ | self_attn.v_proj | 29941.828 | - | - | 1.426 |
503
+ | self_attn.q_proj | 28183.521 | - | - | 1.422 |
504
+ | self_attn.o_proj | 3038.925 | - | - | 2.006 |
505
+ | mlp.up_proj | 68732.734 | - | - | 2.153 |
506
+ | mlp.gate_proj | 71100.156 | - | - | 1.659 |
507
+ | mlp.down_proj | 26909.160 | - | - | 5.484 |
508
+ +------------------+--------------+------------+-----------+-------+
509
+
510
+
511
+ Quantizing layer 37/40..
512
+ +------------------+--------------+------------+-----------+-------+
513
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
514
+ +==================+==============+============+===========+=======+
515
+ | self_attn.k_proj | 27422.418 | - | - | 2.247 |
516
+ | self_attn.v_proj | 32204.926 | - | - | 1.542 |
517
+ | self_attn.q_proj | 27318.453 | - | - | 1.546 |
518
+ | self_attn.o_proj | 2744.674 | - | - | 2.196 |
519
+ | mlp.up_proj | 70397.406 | - | - | 2.310 |
520
+ | mlp.gate_proj | 71945.289 | - | - | 1.640 |
521
+ | mlp.down_proj | 30182.273 | - | - | 6.029 |
522
+ +------------------+--------------+------------+-----------+-------+
523
+
524
+
525
+ Quantizing layer 38/40..
526
+ +------------------+--------------+------------+-----------+-------+
527
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
528
+ +==================+==============+============+===========+=======+
529
+ | self_attn.k_proj | 25726.672 | - | - | 2.424 |
530
+ | self_attn.v_proj | 29666.820 | - | - | 1.542 |
531
+ | self_attn.q_proj | 25691.258 | - | - | 1.530 |
532
+ | self_attn.o_proj | 4238.191 | - | - | 2.473 |
533
+ | mlp.up_proj | 69719.492 | - | - | 2.576 |
534
+ | mlp.gate_proj | 71583.094 | - | - | 1.790 |
535
+ | mlp.down_proj | 35800.488 | - | - | 6.818 |
536
+ +------------------+--------------+------------+-----------+-------+
537
+
538
+
539
+ Quantizing layer 39/40..
540
+ +------------------+--------------+------------+-----------+-------+
541
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
542
+ +==================+==============+============+===========+=======+
543
+ | self_attn.k_proj | 23775.566 | - | - | 2.246 |
544
+ | self_attn.v_proj | 28923.490 | - | - | 1.677 |
545
+ | self_attn.q_proj | 24229.426 | - | - | 1.687 |
546
+ | self_attn.o_proj | 8112.882 | - | - | 2.424 |
547
+ | mlp.up_proj | 63641.328 | - | - | 2.540 |
548
+ | mlp.gate_proj | 66049.242 | - | - | 1.721 |
549
+ | mlp.down_proj | 47443.551 | - | - | 6.712 |
550
+ +------------------+--------------+------------+-----------+-------+
551
+
552
+
553
+ Quantizing layer 40/40..
554
+ +------------------+--------------+------------+-----------+-------+
555
+ | name | weight_error | fp_inp_SNR | q_inp_SNR | time |
556
+ +==================+==============+============+===========+=======+
557
+ | self_attn.k_proj | 15865.144 | - | - | 2.534 |
558
+ | self_attn.v_proj | 16730.523 | - | - | 1.709 |
559
+ | self_attn.q_proj | 15812.807 | - | - | 1.727 |
560
+ | self_attn.o_proj | 3682.083 | - | - | 2.429 |
561
+ | mlp.up_proj | 50250.707 | - | - | 2.543 |
562
+ | mlp.gate_proj | 52429.242 | - | - | 1.738 |
563
+ | mlp.down_proj | 58651.867 | - | - | 6.809 |
564
+ +------------------+--------------+------------+-----------+-------+
565
+
566
+
567
+ 1905.7269313335419
568
+ Packing ...
569
+ model.layers.0.self_attn.k_proj
570
+ model.layers.0.self_attn.o_proj
571
+ model.layers.0.self_attn.q_proj
572
+ model.layers.0.self_attn.v_proj
573
+ model.layers.0.mlp.down_proj
574
+ model.layers.0.mlp.gate_proj
575
+ model.layers.0.mlp.up_proj
576
+ model.layers.1.self_attn.k_proj
577
+ model.layers.1.self_attn.o_proj
578
+ model.layers.1.self_attn.q_proj
579
+ model.layers.1.self_attn.v_proj
580
+ model.layers.1.mlp.down_proj
581
+ model.layers.1.mlp.gate_proj
582
+ model.layers.1.mlp.up_proj
583
+ model.layers.2.self_attn.k_proj
584
+ model.layers.2.self_attn.o_proj
585
+ model.layers.2.self_attn.q_proj
586
+ model.layers.2.self_attn.v_proj
587
+ model.layers.2.mlp.down_proj
588
+ model.layers.2.mlp.gate_proj
589
+ model.layers.2.mlp.up_proj
590
+ model.layers.3.self_attn.k_proj
591
+ model.layers.3.self_attn.o_proj
592
+ model.layers.3.self_attn.q_proj
593
+ model.layers.3.self_attn.v_proj
594
+ model.layers.3.mlp.down_proj
595
+ model.layers.3.mlp.gate_proj
596
+ model.layers.3.mlp.up_proj
597
+ model.layers.4.self_attn.k_proj
598
+ model.layers.4.self_attn.o_proj
599
+ model.layers.4.self_attn.q_proj
600
+ model.layers.4.self_attn.v_proj
601
+ model.layers.4.mlp.down_proj
602
+ model.layers.4.mlp.gate_proj
603
+ model.layers.4.mlp.up_proj
604
+ model.layers.5.self_attn.k_proj
605
+ model.layers.5.self_attn.o_proj
606
+ model.layers.5.self_attn.q_proj
607
+ model.layers.5.self_attn.v_proj
608
+ model.layers.5.mlp.down_proj
609
+ model.layers.5.mlp.gate_proj
610
+ model.layers.5.mlp.up_proj
611
+ model.layers.6.self_attn.k_proj
612
+ model.layers.6.self_attn.o_proj
613
+ model.layers.6.self_attn.q_proj
614
+ model.layers.6.self_attn.v_proj
615
+ model.layers.6.mlp.down_proj
616
+ model.layers.6.mlp.gate_proj
617
+ model.layers.6.mlp.up_proj
618
+ model.layers.7.self_attn.k_proj
619
+ model.layers.7.self_attn.o_proj
620
+ model.layers.7.self_attn.q_proj
621
+ model.layers.7.self_attn.v_proj
622
+ model.layers.7.mlp.down_proj
623
+ model.layers.7.mlp.gate_proj
624
+ model.layers.7.mlp.up_proj
625
+ model.layers.8.self_attn.k_proj
626
+ model.layers.8.self_attn.o_proj
627
+ model.layers.8.self_attn.q_proj
628
+ model.layers.8.self_attn.v_proj
629
+ model.layers.8.mlp.down_proj
630
+ model.layers.8.mlp.gate_proj
631
+ model.layers.8.mlp.up_proj
632
+ model.layers.9.self_attn.k_proj
633
+ model.layers.9.self_attn.o_proj
634
+ model.layers.9.self_attn.q_proj
635
+ model.layers.9.self_attn.v_proj
636
+ model.layers.9.mlp.down_proj
637
+ model.layers.9.mlp.gate_proj
638
+ model.layers.9.mlp.up_proj
639
+ model.layers.10.self_attn.k_proj
640
+ model.layers.10.self_attn.o_proj
641
+ model.layers.10.self_attn.q_proj
642
+ model.layers.10.self_attn.v_proj
643
+ model.layers.10.mlp.down_proj
644
+ model.layers.10.mlp.gate_proj
645
+ model.layers.10.mlp.up_proj
646
+ model.layers.11.self_attn.k_proj
647
+ model.layers.11.self_attn.o_proj
648
+ model.layers.11.self_attn.q_proj
649
+ model.layers.11.self_attn.v_proj
650
+ model.layers.11.mlp.down_proj
651
+ model.layers.11.mlp.gate_proj
652
+ model.layers.11.mlp.up_proj
653
+ model.layers.12.self_attn.k_proj
654
+ model.layers.12.self_attn.o_proj
655
+ model.layers.12.self_attn.q_proj
656
+ model.layers.12.self_attn.v_proj
657
+ model.layers.12.mlp.down_proj
658
+ model.layers.12.mlp.gate_proj
659
+ model.layers.12.mlp.up_proj
660
+ model.layers.13.self_attn.k_proj
661
+ model.layers.13.self_attn.o_proj
662
+ model.layers.13.self_attn.q_proj
663
+ model.layers.13.self_attn.v_proj
664
+ model.layers.13.mlp.down_proj
665
+ model.layers.13.mlp.gate_proj
666
+ model.layers.13.mlp.up_proj
667
+ model.layers.14.self_attn.k_proj
668
+ model.layers.14.self_attn.o_proj
669
+ model.layers.14.self_attn.q_proj
670
+ model.layers.14.self_attn.v_proj
671
+ model.layers.14.mlp.down_proj
672
+ model.layers.14.mlp.gate_proj
673
+ model.layers.14.mlp.up_proj
674
+ model.layers.15.self_attn.k_proj
675
+ model.layers.15.self_attn.o_proj
676
+ model.layers.15.self_attn.q_proj
677
+ model.layers.15.self_attn.v_proj
678
+ model.layers.15.mlp.down_proj
679
+ model.layers.15.mlp.gate_proj
680
+ model.layers.15.mlp.up_proj
681
+ model.layers.16.self_attn.k_proj
682
+ model.layers.16.self_attn.o_proj
683
+ model.layers.16.self_attn.q_proj
684
+ model.layers.16.self_attn.v_proj
685
+ model.layers.16.mlp.down_proj
686
+ model.layers.16.mlp.gate_proj
687
+ model.layers.16.mlp.up_proj
688
+ model.layers.17.self_attn.k_proj
689
+ model.layers.17.self_attn.o_proj
690
+ model.layers.17.self_attn.q_proj
691
+ model.layers.17.self_attn.v_proj
692
+ model.layers.17.mlp.down_proj
693
+ model.layers.17.mlp.gate_proj
694
+ model.layers.17.mlp.up_proj
695
+ model.layers.18.self_attn.k_proj
696
+ model.layers.18.self_attn.o_proj
697
+ model.layers.18.self_attn.q_proj
698
+ model.layers.18.self_attn.v_proj
699
+ model.layers.18.mlp.down_proj
700
+ model.layers.18.mlp.gate_proj
701
+ model.layers.18.mlp.up_proj
702
+ model.layers.19.self_attn.k_proj
703
+ model.layers.19.self_attn.o_proj
704
+ model.layers.19.self_attn.q_proj
705
+ model.layers.19.self_attn.v_proj
706
+ model.layers.19.mlp.down_proj
707
+ model.layers.19.mlp.gate_proj
708
+ model.layers.19.mlp.up_proj
709
+ model.layers.20.self_attn.k_proj
710
+ model.layers.20.self_attn.o_proj
711
+ model.layers.20.self_attn.q_proj
712
+ model.layers.20.self_attn.v_proj
713
+ model.layers.20.mlp.down_proj
714
+ model.layers.20.mlp.gate_proj
715
+ model.layers.20.mlp.up_proj
716
+ model.layers.21.self_attn.k_proj
717
+ model.layers.21.self_attn.o_proj
718
+ model.layers.21.self_attn.q_proj
719
+ model.layers.21.self_attn.v_proj
720
+ model.layers.21.mlp.down_proj
721
+ model.layers.21.mlp.gate_proj
722
+ model.layers.21.mlp.up_proj
723
+ model.layers.22.self_attn.k_proj
724
+ model.layers.22.self_attn.o_proj
725
+ model.layers.22.self_attn.q_proj
726
+ model.layers.22.self_attn.v_proj
727
+ model.layers.22.mlp.down_proj
728
+ model.layers.22.mlp.gate_proj
729
+ model.layers.22.mlp.up_proj
730
+ model.layers.23.self_attn.k_proj
731
+ model.layers.23.self_attn.o_proj
732
+ model.layers.23.self_attn.q_proj
733
+ model.layers.23.self_attn.v_proj
734
+ model.layers.23.mlp.down_proj
735
+ model.layers.23.mlp.gate_proj
736
+ model.layers.23.mlp.up_proj
737
+ model.layers.24.self_attn.k_proj
738
+ model.layers.24.self_attn.o_proj
739
+ model.layers.24.self_attn.q_proj
740
+ model.layers.24.self_attn.v_proj
741
+ model.layers.24.mlp.down_proj
742
+ model.layers.24.mlp.gate_proj
743
+ model.layers.24.mlp.up_proj
744
+ model.layers.25.self_attn.k_proj
745
+ model.layers.25.self_attn.o_proj
746
+ model.layers.25.self_attn.q_proj
747
+ model.layers.25.self_attn.v_proj
748
+ model.layers.25.mlp.down_proj
749
+ model.layers.25.mlp.gate_proj
750
+ model.layers.25.mlp.up_proj
751
+ model.layers.26.self_attn.k_proj
752
+ model.layers.26.self_attn.o_proj
753
+ model.layers.26.self_attn.q_proj
754
+ model.layers.26.self_attn.v_proj
755
+ model.layers.26.mlp.down_proj
756
+ model.layers.26.mlp.gate_proj
757
+ model.layers.26.mlp.up_proj
758
+ model.layers.27.self_attn.k_proj
759
+ model.layers.27.self_attn.o_proj
760
+ model.layers.27.self_attn.q_proj
761
+ model.layers.27.self_attn.v_proj
762
+ model.layers.27.mlp.down_proj
763
+ model.layers.27.mlp.gate_proj
764
+ model.layers.27.mlp.up_proj
765
+ model.layers.28.self_attn.k_proj
766
+ model.layers.28.self_attn.o_proj
767
+ model.layers.28.self_attn.q_proj
768
+ model.layers.28.self_attn.v_proj
769
+ model.layers.28.mlp.down_proj
770
+ model.layers.28.mlp.gate_proj
771
+ model.layers.28.mlp.up_proj
772
+ model.layers.29.self_attn.k_proj
773
+ model.layers.29.self_attn.o_proj
774
+ model.layers.29.self_attn.q_proj
775
+ model.layers.29.self_attn.v_proj
776
+ model.layers.29.mlp.down_proj
777
+ model.layers.29.mlp.gate_proj
778
+ model.layers.29.mlp.up_proj
779
+ model.layers.30.self_attn.k_proj
780
+ model.layers.30.self_attn.o_proj
781
+ model.layers.30.self_attn.q_proj
782
+ model.layers.30.self_attn.v_proj
783
+ model.layers.30.mlp.down_proj
784
+ model.layers.30.mlp.gate_proj
785
+ model.layers.30.mlp.up_proj
786
+ model.layers.31.self_attn.k_proj
787
+ model.layers.31.self_attn.o_proj
788
+ model.layers.31.self_attn.q_proj
789
+ model.layers.31.self_attn.v_proj
790
+ model.layers.31.mlp.down_proj
791
+ model.layers.31.mlp.gate_proj
792
+ model.layers.31.mlp.up_proj
793
+ model.layers.32.self_attn.k_proj
794
+ model.layers.32.self_attn.o_proj
795
+ model.layers.32.self_attn.q_proj
796
+ model.layers.32.self_attn.v_proj
797
+ model.layers.32.mlp.down_proj
798
+ model.layers.32.mlp.gate_proj
799
+ model.layers.32.mlp.up_proj
800
+ model.layers.33.self_attn.k_proj
801
+ model.layers.33.self_attn.o_proj
802
+ model.layers.33.self_attn.q_proj
803
+ model.layers.33.self_attn.v_proj
804
+ model.layers.33.mlp.down_proj
805
+ model.layers.33.mlp.gate_proj
806
+ model.layers.33.mlp.up_proj
807
+ model.layers.34.self_attn.k_proj
808
+ model.layers.34.self_attn.o_proj
809
+ model.layers.34.self_attn.q_proj
810
+ model.layers.34.self_attn.v_proj
811
+ model.layers.34.mlp.down_proj
812
+ model.layers.34.mlp.gate_proj
813
+ model.layers.34.mlp.up_proj
814
+ model.layers.35.self_attn.k_proj
815
+ model.layers.35.self_attn.o_proj
816
+ model.layers.35.self_attn.q_proj
817
+ model.layers.35.self_attn.v_proj
818
+ model.layers.35.mlp.down_proj
819
+ model.layers.35.mlp.gate_proj
820
+ model.layers.35.mlp.up_proj
821
+ model.layers.36.self_attn.k_proj
822
+ model.layers.36.self_attn.o_proj
823
+ model.layers.36.self_attn.q_proj
824
+ model.layers.36.self_attn.v_proj
825
+ model.layers.36.mlp.down_proj
826
+ model.layers.36.mlp.gate_proj
827
+ model.layers.36.mlp.up_proj
828
+ model.layers.37.self_attn.k_proj
829
+ model.layers.37.self_attn.o_proj
830
+ model.layers.37.self_attn.q_proj
831
+ model.layers.37.self_attn.v_proj
832
+ model.layers.37.mlp.down_proj
833
+ model.layers.37.mlp.gate_proj
834
+ model.layers.37.mlp.up_proj
835
+ model.layers.38.self_attn.k_proj
836
+ model.layers.38.self_attn.o_proj
837
+ model.layers.38.self_attn.q_proj
838
+ model.layers.38.self_attn.v_proj
839
+ model.layers.38.mlp.down_proj
840
+ model.layers.38.mlp.gate_proj
841
+ model.layers.38.mlp.up_proj
842
+ model.layers.39.self_attn.k_proj
843
+ model.layers.39.self_attn.o_proj
844
+ model.layers.39.self_attn.q_proj
845
+ model.layers.39.self_attn.v_proj
846
+ model.layers.39.mlp.down_proj
847
+ model.layers.39.mlp.gate_proj
848
+ model.layers.39.mlp.up_proj
849
+ Done.
manticore-13b-4bit-128g.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29b3092a7e667c806a69ad33099d0afe636c40614ae45e7d050a000b22b08db1
3
+ size 7255159218
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "model_max_length": 2048,
22
+ "pad_token": null,
23
+ "sp_model_kwargs": {},
24
+ "tokenizer_class": "LlamaTokenizer",
25
+ "unk_token": {
26
+ "__type": "AddedToken",
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": true,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }