ColinZ22 commited on
Commit
9770fa2
·
verified ·
1 Parent(s): 7e388a5

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. chat_template.jinja +2 -2
  2. config.json +3 -229
  3. model-00005-of-00282.safetensors +2 -2
  4. model-00009-of-00282.safetensors +2 -2
  5. model-00012-of-00282.safetensors +2 -2
  6. model-00013-of-00282.safetensors +2 -2
  7. model-00016-of-00282.safetensors +2 -2
  8. model-00020-of-00282.safetensors +2 -2
  9. model-00024-of-00282.safetensors +2 -2
  10. model-00027-of-00282.safetensors +2 -2
  11. model-00031-of-00282.safetensors +2 -2
  12. model-00035-of-00282.safetensors +2 -2
  13. model-00038-of-00282.safetensors +2 -2
  14. model-00042-of-00282.safetensors +2 -2
  15. model-00046-of-00282.safetensors +2 -2
  16. model-00049-of-00282.safetensors +2 -2
  17. model-00050-of-00282.safetensors +2 -2
  18. model-00053-of-00282.safetensors +2 -2
  19. model-00057-of-00282.safetensors +2 -2
  20. model-00061-of-00282.safetensors +2 -2
  21. model-00064-of-00282.safetensors +2 -2
  22. model-00068-of-00282.safetensors +2 -2
  23. model-00072-of-00282.safetensors +2 -2
  24. model-00075-of-00282.safetensors +2 -2
  25. model-00079-of-00282.safetensors +2 -2
  26. model-00083-of-00282.safetensors +2 -2
  27. model-00086-of-00282.safetensors +2 -2
  28. model-00090-of-00282.safetensors +2 -2
  29. model-00094-of-00282.safetensors +2 -2
  30. model-00097-of-00282.safetensors +2 -2
  31. model-00101-of-00282.safetensors +2 -2
  32. model-00105-of-00282.safetensors +2 -2
  33. model-00108-of-00282.safetensors +2 -2
  34. model-00112-of-00282.safetensors +2 -2
  35. model-00116-of-00282.safetensors +2 -2
  36. model-00120-of-00282.safetensors +2 -2
  37. model-00123-of-00282.safetensors +2 -2
  38. model-00127-of-00282.safetensors +2 -2
  39. model-00131-of-00282.safetensors +2 -2
  40. model-00134-of-00282.safetensors +2 -2
  41. model-00138-of-00282.safetensors +2 -2
  42. model-00142-of-00282.safetensors +2 -2
  43. model-00145-of-00282.safetensors +2 -2
  44. model-00149-of-00282.safetensors +2 -2
  45. model-00153-of-00282.safetensors +2 -2
  46. model-00156-of-00282.safetensors +2 -2
  47. model-00160-of-00282.safetensors +2 -2
  48. model-00164-of-00282.safetensors +2 -2
  49. model-00167-of-00282.safetensors +2 -2
  50. model-00171-of-00282.safetensors +2 -2
chat_template.jinja CHANGED
@@ -32,10 +32,10 @@ For each function call, output the function name and arguments within the follow
32
  {%- set ns = namespace(last_user_index=-1) %}
33
  {%- for m in messages %}
34
  {%- if m.role == 'user' %}
35
- {% set ns.last_user_index = loop.index0 -%}
36
  {%- endif %}
37
  {%- endfor %}
38
- {% for m in messages %}
39
  {%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}
40
  {%- elif m.role == 'assistant' -%}
41
  <|assistant|>
 
32
  {%- set ns = namespace(last_user_index=-1) %}
33
  {%- for m in messages %}
34
  {%- if m.role == 'user' %}
35
+ {%- set ns.last_user_index = loop.index0 -%}
36
  {%- endif %}
37
  {%- endfor %}
38
+ {%- for m in messages -%}
39
  {%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}
40
  {%- elif m.role == 'assistant' -%}
41
  <|assistant|>
config.json CHANGED
@@ -64,6 +64,7 @@
64
  "qscheme": "per_group",
65
  "ch_axis": -1,
66
  "group_size": 32,
 
67
  "symmetric": null,
68
  "round_method": "half_even",
69
  "scale_type": "float",
@@ -80,6 +81,7 @@
80
  "qscheme": "per_group",
81
  "ch_axis": -1,
82
  "group_size": 32,
 
83
  "symmetric": null,
84
  "round_method": "half_even",
85
  "scale_type": "float",
@@ -117,9 +119,6 @@
117
  "model.layers.1.self_attn.q_a_proj",
118
  "model.layers.1.self_attn.q_b_proj",
119
  "model.layers.10.mlp.gate",
120
- "model.layers.10.mlp.shared_experts.down_proj",
121
- "model.layers.10.mlp.shared_experts.gate_proj",
122
- "model.layers.10.mlp.shared_experts.up_proj",
123
  "model.layers.10.self_attn.indexer.weights_proj",
124
  "model.layers.10.self_attn.indexer.wk",
125
  "model.layers.10.self_attn.indexer.wq_b",
@@ -129,9 +128,6 @@
129
  "model.layers.10.self_attn.q_a_proj",
130
  "model.layers.10.self_attn.q_b_proj",
131
  "model.layers.11.mlp.gate",
132
- "model.layers.11.mlp.shared_experts.down_proj",
133
- "model.layers.11.mlp.shared_experts.gate_proj",
134
- "model.layers.11.mlp.shared_experts.up_proj",
135
  "model.layers.11.self_attn.indexer.weights_proj",
136
  "model.layers.11.self_attn.indexer.wk",
137
  "model.layers.11.self_attn.indexer.wq_b",
@@ -141,9 +137,6 @@
141
  "model.layers.11.self_attn.q_a_proj",
142
  "model.layers.11.self_attn.q_b_proj",
143
  "model.layers.12.mlp.gate",
144
- "model.layers.12.mlp.shared_experts.down_proj",
145
- "model.layers.12.mlp.shared_experts.gate_proj",
146
- "model.layers.12.mlp.shared_experts.up_proj",
147
  "model.layers.12.self_attn.indexer.weights_proj",
148
  "model.layers.12.self_attn.indexer.wk",
149
  "model.layers.12.self_attn.indexer.wq_b",
@@ -153,9 +146,6 @@
153
  "model.layers.12.self_attn.q_a_proj",
154
  "model.layers.12.self_attn.q_b_proj",
155
  "model.layers.13.mlp.gate",
156
- "model.layers.13.mlp.shared_experts.down_proj",
157
- "model.layers.13.mlp.shared_experts.gate_proj",
158
- "model.layers.13.mlp.shared_experts.up_proj",
159
  "model.layers.13.self_attn.indexer.weights_proj",
160
  "model.layers.13.self_attn.indexer.wk",
161
  "model.layers.13.self_attn.indexer.wq_b",
@@ -165,9 +155,6 @@
165
  "model.layers.13.self_attn.q_a_proj",
166
  "model.layers.13.self_attn.q_b_proj",
167
  "model.layers.14.mlp.gate",
168
- "model.layers.14.mlp.shared_experts.down_proj",
169
- "model.layers.14.mlp.shared_experts.gate_proj",
170
- "model.layers.14.mlp.shared_experts.up_proj",
171
  "model.layers.14.self_attn.indexer.weights_proj",
172
  "model.layers.14.self_attn.indexer.wk",
173
  "model.layers.14.self_attn.indexer.wq_b",
@@ -177,9 +164,6 @@
177
  "model.layers.14.self_attn.q_a_proj",
178
  "model.layers.14.self_attn.q_b_proj",
179
  "model.layers.15.mlp.gate",
180
- "model.layers.15.mlp.shared_experts.down_proj",
181
- "model.layers.15.mlp.shared_experts.gate_proj",
182
- "model.layers.15.mlp.shared_experts.up_proj",
183
  "model.layers.15.self_attn.indexer.weights_proj",
184
  "model.layers.15.self_attn.indexer.wk",
185
  "model.layers.15.self_attn.indexer.wq_b",
@@ -189,9 +173,6 @@
189
  "model.layers.15.self_attn.q_a_proj",
190
  "model.layers.15.self_attn.q_b_proj",
191
  "model.layers.16.mlp.gate",
192
- "model.layers.16.mlp.shared_experts.down_proj",
193
- "model.layers.16.mlp.shared_experts.gate_proj",
194
- "model.layers.16.mlp.shared_experts.up_proj",
195
  "model.layers.16.self_attn.indexer.weights_proj",
196
  "model.layers.16.self_attn.indexer.wk",
197
  "model.layers.16.self_attn.indexer.wq_b",
@@ -201,9 +182,6 @@
201
  "model.layers.16.self_attn.q_a_proj",
202
  "model.layers.16.self_attn.q_b_proj",
203
  "model.layers.17.mlp.gate",
204
- "model.layers.17.mlp.shared_experts.down_proj",
205
- "model.layers.17.mlp.shared_experts.gate_proj",
206
- "model.layers.17.mlp.shared_experts.up_proj",
207
  "model.layers.17.self_attn.indexer.weights_proj",
208
  "model.layers.17.self_attn.indexer.wk",
209
  "model.layers.17.self_attn.indexer.wq_b",
@@ -213,9 +191,6 @@
213
  "model.layers.17.self_attn.q_a_proj",
214
  "model.layers.17.self_attn.q_b_proj",
215
  "model.layers.18.mlp.gate",
216
- "model.layers.18.mlp.shared_experts.down_proj",
217
- "model.layers.18.mlp.shared_experts.gate_proj",
218
- "model.layers.18.mlp.shared_experts.up_proj",
219
  "model.layers.18.self_attn.indexer.weights_proj",
220
  "model.layers.18.self_attn.indexer.wk",
221
  "model.layers.18.self_attn.indexer.wq_b",
@@ -225,9 +200,6 @@
225
  "model.layers.18.self_attn.q_a_proj",
226
  "model.layers.18.self_attn.q_b_proj",
227
  "model.layers.19.mlp.gate",
228
- "model.layers.19.mlp.shared_experts.down_proj",
229
- "model.layers.19.mlp.shared_experts.gate_proj",
230
- "model.layers.19.mlp.shared_experts.up_proj",
231
  "model.layers.19.self_attn.indexer.weights_proj",
232
  "model.layers.19.self_attn.indexer.wk",
233
  "model.layers.19.self_attn.indexer.wq_b",
@@ -248,9 +220,6 @@
248
  "model.layers.2.self_attn.q_a_proj",
249
  "model.layers.2.self_attn.q_b_proj",
250
  "model.layers.20.mlp.gate",
251
- "model.layers.20.mlp.shared_experts.down_proj",
252
- "model.layers.20.mlp.shared_experts.gate_proj",
253
- "model.layers.20.mlp.shared_experts.up_proj",
254
  "model.layers.20.self_attn.indexer.weights_proj",
255
  "model.layers.20.self_attn.indexer.wk",
256
  "model.layers.20.self_attn.indexer.wq_b",
@@ -260,9 +229,6 @@
260
  "model.layers.20.self_attn.q_a_proj",
261
  "model.layers.20.self_attn.q_b_proj",
262
  "model.layers.21.mlp.gate",
263
- "model.layers.21.mlp.shared_experts.down_proj",
264
- "model.layers.21.mlp.shared_experts.gate_proj",
265
- "model.layers.21.mlp.shared_experts.up_proj",
266
  "model.layers.21.self_attn.indexer.weights_proj",
267
  "model.layers.21.self_attn.indexer.wk",
268
  "model.layers.21.self_attn.indexer.wq_b",
@@ -272,9 +238,6 @@
272
  "model.layers.21.self_attn.q_a_proj",
273
  "model.layers.21.self_attn.q_b_proj",
274
  "model.layers.22.mlp.gate",
275
- "model.layers.22.mlp.shared_experts.down_proj",
276
- "model.layers.22.mlp.shared_experts.gate_proj",
277
- "model.layers.22.mlp.shared_experts.up_proj",
278
  "model.layers.22.self_attn.indexer.weights_proj",
279
  "model.layers.22.self_attn.indexer.wk",
280
  "model.layers.22.self_attn.indexer.wq_b",
@@ -284,9 +247,6 @@
284
  "model.layers.22.self_attn.q_a_proj",
285
  "model.layers.22.self_attn.q_b_proj",
286
  "model.layers.23.mlp.gate",
287
- "model.layers.23.mlp.shared_experts.down_proj",
288
- "model.layers.23.mlp.shared_experts.gate_proj",
289
- "model.layers.23.mlp.shared_experts.up_proj",
290
  "model.layers.23.self_attn.indexer.weights_proj",
291
  "model.layers.23.self_attn.indexer.wk",
292
  "model.layers.23.self_attn.indexer.wq_b",
@@ -296,9 +256,6 @@
296
  "model.layers.23.self_attn.q_a_proj",
297
  "model.layers.23.self_attn.q_b_proj",
298
  "model.layers.24.mlp.gate",
299
- "model.layers.24.mlp.shared_experts.down_proj",
300
- "model.layers.24.mlp.shared_experts.gate_proj",
301
- "model.layers.24.mlp.shared_experts.up_proj",
302
  "model.layers.24.self_attn.indexer.weights_proj",
303
  "model.layers.24.self_attn.indexer.wk",
304
  "model.layers.24.self_attn.indexer.wq_b",
@@ -308,9 +265,6 @@
308
  "model.layers.24.self_attn.q_a_proj",
309
  "model.layers.24.self_attn.q_b_proj",
310
  "model.layers.25.mlp.gate",
311
- "model.layers.25.mlp.shared_experts.down_proj",
312
- "model.layers.25.mlp.shared_experts.gate_proj",
313
- "model.layers.25.mlp.shared_experts.up_proj",
314
  "model.layers.25.self_attn.indexer.weights_proj",
315
  "model.layers.25.self_attn.indexer.wk",
316
  "model.layers.25.self_attn.indexer.wq_b",
@@ -320,9 +274,6 @@
320
  "model.layers.25.self_attn.q_a_proj",
321
  "model.layers.25.self_attn.q_b_proj",
322
  "model.layers.26.mlp.gate",
323
- "model.layers.26.mlp.shared_experts.down_proj",
324
- "model.layers.26.mlp.shared_experts.gate_proj",
325
- "model.layers.26.mlp.shared_experts.up_proj",
326
  "model.layers.26.self_attn.indexer.weights_proj",
327
  "model.layers.26.self_attn.indexer.wk",
328
  "model.layers.26.self_attn.indexer.wq_b",
@@ -332,9 +283,6 @@
332
  "model.layers.26.self_attn.q_a_proj",
333
  "model.layers.26.self_attn.q_b_proj",
334
  "model.layers.27.mlp.gate",
335
- "model.layers.27.mlp.shared_experts.down_proj",
336
- "model.layers.27.mlp.shared_experts.gate_proj",
337
- "model.layers.27.mlp.shared_experts.up_proj",
338
  "model.layers.27.self_attn.indexer.weights_proj",
339
  "model.layers.27.self_attn.indexer.wk",
340
  "model.layers.27.self_attn.indexer.wq_b",
@@ -344,9 +292,6 @@
344
  "model.layers.27.self_attn.q_a_proj",
345
  "model.layers.27.self_attn.q_b_proj",
346
  "model.layers.28.mlp.gate",
347
- "model.layers.28.mlp.shared_experts.down_proj",
348
- "model.layers.28.mlp.shared_experts.gate_proj",
349
- "model.layers.28.mlp.shared_experts.up_proj",
350
  "model.layers.28.self_attn.indexer.weights_proj",
351
  "model.layers.28.self_attn.indexer.wk",
352
  "model.layers.28.self_attn.indexer.wq_b",
@@ -356,9 +301,6 @@
356
  "model.layers.28.self_attn.q_a_proj",
357
  "model.layers.28.self_attn.q_b_proj",
358
  "model.layers.29.mlp.gate",
359
- "model.layers.29.mlp.shared_experts.down_proj",
360
- "model.layers.29.mlp.shared_experts.gate_proj",
361
- "model.layers.29.mlp.shared_experts.up_proj",
362
  "model.layers.29.self_attn.indexer.weights_proj",
363
  "model.layers.29.self_attn.indexer.wk",
364
  "model.layers.29.self_attn.indexer.wq_b",
@@ -368,9 +310,6 @@
368
  "model.layers.29.self_attn.q_a_proj",
369
  "model.layers.29.self_attn.q_b_proj",
370
  "model.layers.3.mlp.gate",
371
- "model.layers.3.mlp.shared_experts.down_proj",
372
- "model.layers.3.mlp.shared_experts.gate_proj",
373
- "model.layers.3.mlp.shared_experts.up_proj",
374
  "model.layers.3.self_attn.indexer.weights_proj",
375
  "model.layers.3.self_attn.indexer.wk",
376
  "model.layers.3.self_attn.indexer.wq_b",
@@ -380,9 +319,6 @@
380
  "model.layers.3.self_attn.q_a_proj",
381
  "model.layers.3.self_attn.q_b_proj",
382
  "model.layers.30.mlp.gate",
383
- "model.layers.30.mlp.shared_experts.down_proj",
384
- "model.layers.30.mlp.shared_experts.gate_proj",
385
- "model.layers.30.mlp.shared_experts.up_proj",
386
  "model.layers.30.self_attn.indexer.weights_proj",
387
  "model.layers.30.self_attn.indexer.wk",
388
  "model.layers.30.self_attn.indexer.wq_b",
@@ -392,9 +328,6 @@
392
  "model.layers.30.self_attn.q_a_proj",
393
  "model.layers.30.self_attn.q_b_proj",
394
  "model.layers.31.mlp.gate",
395
- "model.layers.31.mlp.shared_experts.down_proj",
396
- "model.layers.31.mlp.shared_experts.gate_proj",
397
- "model.layers.31.mlp.shared_experts.up_proj",
398
  "model.layers.31.self_attn.indexer.weights_proj",
399
  "model.layers.31.self_attn.indexer.wk",
400
  "model.layers.31.self_attn.indexer.wq_b",
@@ -404,9 +337,6 @@
404
  "model.layers.31.self_attn.q_a_proj",
405
  "model.layers.31.self_attn.q_b_proj",
406
  "model.layers.32.mlp.gate",
407
- "model.layers.32.mlp.shared_experts.down_proj",
408
- "model.layers.32.mlp.shared_experts.gate_proj",
409
- "model.layers.32.mlp.shared_experts.up_proj",
410
  "model.layers.32.self_attn.indexer.weights_proj",
411
  "model.layers.32.self_attn.indexer.wk",
412
  "model.layers.32.self_attn.indexer.wq_b",
@@ -416,9 +346,6 @@
416
  "model.layers.32.self_attn.q_a_proj",
417
  "model.layers.32.self_attn.q_b_proj",
418
  "model.layers.33.mlp.gate",
419
- "model.layers.33.mlp.shared_experts.down_proj",
420
- "model.layers.33.mlp.shared_experts.gate_proj",
421
- "model.layers.33.mlp.shared_experts.up_proj",
422
  "model.layers.33.self_attn.indexer.weights_proj",
423
  "model.layers.33.self_attn.indexer.wk",
424
  "model.layers.33.self_attn.indexer.wq_b",
@@ -428,9 +355,6 @@
428
  "model.layers.33.self_attn.q_a_proj",
429
  "model.layers.33.self_attn.q_b_proj",
430
  "model.layers.34.mlp.gate",
431
- "model.layers.34.mlp.shared_experts.down_proj",
432
- "model.layers.34.mlp.shared_experts.gate_proj",
433
- "model.layers.34.mlp.shared_experts.up_proj",
434
  "model.layers.34.self_attn.indexer.weights_proj",
435
  "model.layers.34.self_attn.indexer.wk",
436
  "model.layers.34.self_attn.indexer.wq_b",
@@ -440,9 +364,6 @@
440
  "model.layers.34.self_attn.q_a_proj",
441
  "model.layers.34.self_attn.q_b_proj",
442
  "model.layers.35.mlp.gate",
443
- "model.layers.35.mlp.shared_experts.down_proj",
444
- "model.layers.35.mlp.shared_experts.gate_proj",
445
- "model.layers.35.mlp.shared_experts.up_proj",
446
  "model.layers.35.self_attn.indexer.weights_proj",
447
  "model.layers.35.self_attn.indexer.wk",
448
  "model.layers.35.self_attn.indexer.wq_b",
@@ -452,9 +373,6 @@
452
  "model.layers.35.self_attn.q_a_proj",
453
  "model.layers.35.self_attn.q_b_proj",
454
  "model.layers.36.mlp.gate",
455
- "model.layers.36.mlp.shared_experts.down_proj",
456
- "model.layers.36.mlp.shared_experts.gate_proj",
457
- "model.layers.36.mlp.shared_experts.up_proj",
458
  "model.layers.36.self_attn.indexer.weights_proj",
459
  "model.layers.36.self_attn.indexer.wk",
460
  "model.layers.36.self_attn.indexer.wq_b",
@@ -464,9 +382,6 @@
464
  "model.layers.36.self_attn.q_a_proj",
465
  "model.layers.36.self_attn.q_b_proj",
466
  "model.layers.37.mlp.gate",
467
- "model.layers.37.mlp.shared_experts.down_proj",
468
- "model.layers.37.mlp.shared_experts.gate_proj",
469
- "model.layers.37.mlp.shared_experts.up_proj",
470
  "model.layers.37.self_attn.indexer.weights_proj",
471
  "model.layers.37.self_attn.indexer.wk",
472
  "model.layers.37.self_attn.indexer.wq_b",
@@ -476,9 +391,6 @@
476
  "model.layers.37.self_attn.q_a_proj",
477
  "model.layers.37.self_attn.q_b_proj",
478
  "model.layers.38.mlp.gate",
479
- "model.layers.38.mlp.shared_experts.down_proj",
480
- "model.layers.38.mlp.shared_experts.gate_proj",
481
- "model.layers.38.mlp.shared_experts.up_proj",
482
  "model.layers.38.self_attn.indexer.weights_proj",
483
  "model.layers.38.self_attn.indexer.wk",
484
  "model.layers.38.self_attn.indexer.wq_b",
@@ -488,9 +400,6 @@
488
  "model.layers.38.self_attn.q_a_proj",
489
  "model.layers.38.self_attn.q_b_proj",
490
  "model.layers.39.mlp.gate",
491
- "model.layers.39.mlp.shared_experts.down_proj",
492
- "model.layers.39.mlp.shared_experts.gate_proj",
493
- "model.layers.39.mlp.shared_experts.up_proj",
494
  "model.layers.39.self_attn.indexer.weights_proj",
495
  "model.layers.39.self_attn.indexer.wk",
496
  "model.layers.39.self_attn.indexer.wq_b",
@@ -500,9 +409,6 @@
500
  "model.layers.39.self_attn.q_a_proj",
501
  "model.layers.39.self_attn.q_b_proj",
502
  "model.layers.4.mlp.gate",
503
- "model.layers.4.mlp.shared_experts.down_proj",
504
- "model.layers.4.mlp.shared_experts.gate_proj",
505
- "model.layers.4.mlp.shared_experts.up_proj",
506
  "model.layers.4.self_attn.indexer.weights_proj",
507
  "model.layers.4.self_attn.indexer.wk",
508
  "model.layers.4.self_attn.indexer.wq_b",
@@ -512,9 +418,6 @@
512
  "model.layers.4.self_attn.q_a_proj",
513
  "model.layers.4.self_attn.q_b_proj",
514
  "model.layers.40.mlp.gate",
515
- "model.layers.40.mlp.shared_experts.down_proj",
516
- "model.layers.40.mlp.shared_experts.gate_proj",
517
- "model.layers.40.mlp.shared_experts.up_proj",
518
  "model.layers.40.self_attn.indexer.weights_proj",
519
  "model.layers.40.self_attn.indexer.wk",
520
  "model.layers.40.self_attn.indexer.wq_b",
@@ -524,9 +427,6 @@
524
  "model.layers.40.self_attn.q_a_proj",
525
  "model.layers.40.self_attn.q_b_proj",
526
  "model.layers.41.mlp.gate",
527
- "model.layers.41.mlp.shared_experts.down_proj",
528
- "model.layers.41.mlp.shared_experts.gate_proj",
529
- "model.layers.41.mlp.shared_experts.up_proj",
530
  "model.layers.41.self_attn.indexer.weights_proj",
531
  "model.layers.41.self_attn.indexer.wk",
532
  "model.layers.41.self_attn.indexer.wq_b",
@@ -536,9 +436,6 @@
536
  "model.layers.41.self_attn.q_a_proj",
537
  "model.layers.41.self_attn.q_b_proj",
538
  "model.layers.42.mlp.gate",
539
- "model.layers.42.mlp.shared_experts.down_proj",
540
- "model.layers.42.mlp.shared_experts.gate_proj",
541
- "model.layers.42.mlp.shared_experts.up_proj",
542
  "model.layers.42.self_attn.indexer.weights_proj",
543
  "model.layers.42.self_attn.indexer.wk",
544
  "model.layers.42.self_attn.indexer.wq_b",
@@ -548,9 +445,6 @@
548
  "model.layers.42.self_attn.q_a_proj",
549
  "model.layers.42.self_attn.q_b_proj",
550
  "model.layers.43.mlp.gate",
551
- "model.layers.43.mlp.shared_experts.down_proj",
552
- "model.layers.43.mlp.shared_experts.gate_proj",
553
- "model.layers.43.mlp.shared_experts.up_proj",
554
  "model.layers.43.self_attn.indexer.weights_proj",
555
  "model.layers.43.self_attn.indexer.wk",
556
  "model.layers.43.self_attn.indexer.wq_b",
@@ -560,9 +454,6 @@
560
  "model.layers.43.self_attn.q_a_proj",
561
  "model.layers.43.self_attn.q_b_proj",
562
  "model.layers.44.mlp.gate",
563
- "model.layers.44.mlp.shared_experts.down_proj",
564
- "model.layers.44.mlp.shared_experts.gate_proj",
565
- "model.layers.44.mlp.shared_experts.up_proj",
566
  "model.layers.44.self_attn.indexer.weights_proj",
567
  "model.layers.44.self_attn.indexer.wk",
568
  "model.layers.44.self_attn.indexer.wq_b",
@@ -572,9 +463,6 @@
572
  "model.layers.44.self_attn.q_a_proj",
573
  "model.layers.44.self_attn.q_b_proj",
574
  "model.layers.45.mlp.gate",
575
- "model.layers.45.mlp.shared_experts.down_proj",
576
- "model.layers.45.mlp.shared_experts.gate_proj",
577
- "model.layers.45.mlp.shared_experts.up_proj",
578
  "model.layers.45.self_attn.indexer.weights_proj",
579
  "model.layers.45.self_attn.indexer.wk",
580
  "model.layers.45.self_attn.indexer.wq_b",
@@ -584,9 +472,6 @@
584
  "model.layers.45.self_attn.q_a_proj",
585
  "model.layers.45.self_attn.q_b_proj",
586
  "model.layers.46.mlp.gate",
587
- "model.layers.46.mlp.shared_experts.down_proj",
588
- "model.layers.46.mlp.shared_experts.gate_proj",
589
- "model.layers.46.mlp.shared_experts.up_proj",
590
  "model.layers.46.self_attn.indexer.weights_proj",
591
  "model.layers.46.self_attn.indexer.wk",
592
  "model.layers.46.self_attn.indexer.wq_b",
@@ -596,9 +481,6 @@
596
  "model.layers.46.self_attn.q_a_proj",
597
  "model.layers.46.self_attn.q_b_proj",
598
  "model.layers.47.mlp.gate",
599
- "model.layers.47.mlp.shared_experts.down_proj",
600
- "model.layers.47.mlp.shared_experts.gate_proj",
601
- "model.layers.47.mlp.shared_experts.up_proj",
602
  "model.layers.47.self_attn.indexer.weights_proj",
603
  "model.layers.47.self_attn.indexer.wk",
604
  "model.layers.47.self_attn.indexer.wq_b",
@@ -608,9 +490,6 @@
608
  "model.layers.47.self_attn.q_a_proj",
609
  "model.layers.47.self_attn.q_b_proj",
610
  "model.layers.48.mlp.gate",
611
- "model.layers.48.mlp.shared_experts.down_proj",
612
- "model.layers.48.mlp.shared_experts.gate_proj",
613
- "model.layers.48.mlp.shared_experts.up_proj",
614
  "model.layers.48.self_attn.indexer.weights_proj",
615
  "model.layers.48.self_attn.indexer.wk",
616
  "model.layers.48.self_attn.indexer.wq_b",
@@ -620,9 +499,6 @@
620
  "model.layers.48.self_attn.q_a_proj",
621
  "model.layers.48.self_attn.q_b_proj",
622
  "model.layers.49.mlp.gate",
623
- "model.layers.49.mlp.shared_experts.down_proj",
624
- "model.layers.49.mlp.shared_experts.gate_proj",
625
- "model.layers.49.mlp.shared_experts.up_proj",
626
  "model.layers.49.self_attn.indexer.weights_proj",
627
  "model.layers.49.self_attn.indexer.wk",
628
  "model.layers.49.self_attn.indexer.wq_b",
@@ -632,9 +508,6 @@
632
  "model.layers.49.self_attn.q_a_proj",
633
  "model.layers.49.self_attn.q_b_proj",
634
  "model.layers.5.mlp.gate",
635
- "model.layers.5.mlp.shared_experts.down_proj",
636
- "model.layers.5.mlp.shared_experts.gate_proj",
637
- "model.layers.5.mlp.shared_experts.up_proj",
638
  "model.layers.5.self_attn.indexer.weights_proj",
639
  "model.layers.5.self_attn.indexer.wk",
640
  "model.layers.5.self_attn.indexer.wq_b",
@@ -644,9 +517,6 @@
644
  "model.layers.5.self_attn.q_a_proj",
645
  "model.layers.5.self_attn.q_b_proj",
646
  "model.layers.50.mlp.gate",
647
- "model.layers.50.mlp.shared_experts.down_proj",
648
- "model.layers.50.mlp.shared_experts.gate_proj",
649
- "model.layers.50.mlp.shared_experts.up_proj",
650
  "model.layers.50.self_attn.indexer.weights_proj",
651
  "model.layers.50.self_attn.indexer.wk",
652
  "model.layers.50.self_attn.indexer.wq_b",
@@ -656,9 +526,6 @@
656
  "model.layers.50.self_attn.q_a_proj",
657
  "model.layers.50.self_attn.q_b_proj",
658
  "model.layers.51.mlp.gate",
659
- "model.layers.51.mlp.shared_experts.down_proj",
660
- "model.layers.51.mlp.shared_experts.gate_proj",
661
- "model.layers.51.mlp.shared_experts.up_proj",
662
  "model.layers.51.self_attn.indexer.weights_proj",
663
  "model.layers.51.self_attn.indexer.wk",
664
  "model.layers.51.self_attn.indexer.wq_b",
@@ -668,9 +535,6 @@
668
  "model.layers.51.self_attn.q_a_proj",
669
  "model.layers.51.self_attn.q_b_proj",
670
  "model.layers.52.mlp.gate",
671
- "model.layers.52.mlp.shared_experts.down_proj",
672
- "model.layers.52.mlp.shared_experts.gate_proj",
673
- "model.layers.52.mlp.shared_experts.up_proj",
674
  "model.layers.52.self_attn.indexer.weights_proj",
675
  "model.layers.52.self_attn.indexer.wk",
676
  "model.layers.52.self_attn.indexer.wq_b",
@@ -680,9 +544,6 @@
680
  "model.layers.52.self_attn.q_a_proj",
681
  "model.layers.52.self_attn.q_b_proj",
682
  "model.layers.53.mlp.gate",
683
- "model.layers.53.mlp.shared_experts.down_proj",
684
- "model.layers.53.mlp.shared_experts.gate_proj",
685
- "model.layers.53.mlp.shared_experts.up_proj",
686
  "model.layers.53.self_attn.indexer.weights_proj",
687
  "model.layers.53.self_attn.indexer.wk",
688
  "model.layers.53.self_attn.indexer.wq_b",
@@ -692,9 +553,6 @@
692
  "model.layers.53.self_attn.q_a_proj",
693
  "model.layers.53.self_attn.q_b_proj",
694
  "model.layers.54.mlp.gate",
695
- "model.layers.54.mlp.shared_experts.down_proj",
696
- "model.layers.54.mlp.shared_experts.gate_proj",
697
- "model.layers.54.mlp.shared_experts.up_proj",
698
  "model.layers.54.self_attn.indexer.weights_proj",
699
  "model.layers.54.self_attn.indexer.wk",
700
  "model.layers.54.self_attn.indexer.wq_b",
@@ -704,9 +562,6 @@
704
  "model.layers.54.self_attn.q_a_proj",
705
  "model.layers.54.self_attn.q_b_proj",
706
  "model.layers.55.mlp.gate",
707
- "model.layers.55.mlp.shared_experts.down_proj",
708
- "model.layers.55.mlp.shared_experts.gate_proj",
709
- "model.layers.55.mlp.shared_experts.up_proj",
710
  "model.layers.55.self_attn.indexer.weights_proj",
711
  "model.layers.55.self_attn.indexer.wk",
712
  "model.layers.55.self_attn.indexer.wq_b",
@@ -716,9 +571,6 @@
716
  "model.layers.55.self_attn.q_a_proj",
717
  "model.layers.55.self_attn.q_b_proj",
718
  "model.layers.56.mlp.gate",
719
- "model.layers.56.mlp.shared_experts.down_proj",
720
- "model.layers.56.mlp.shared_experts.gate_proj",
721
- "model.layers.56.mlp.shared_experts.up_proj",
722
  "model.layers.56.self_attn.indexer.weights_proj",
723
  "model.layers.56.self_attn.indexer.wk",
724
  "model.layers.56.self_attn.indexer.wq_b",
@@ -728,9 +580,6 @@
728
  "model.layers.56.self_attn.q_a_proj",
729
  "model.layers.56.self_attn.q_b_proj",
730
  "model.layers.57.mlp.gate",
731
- "model.layers.57.mlp.shared_experts.down_proj",
732
- "model.layers.57.mlp.shared_experts.gate_proj",
733
- "model.layers.57.mlp.shared_experts.up_proj",
734
  "model.layers.57.self_attn.indexer.weights_proj",
735
  "model.layers.57.self_attn.indexer.wk",
736
  "model.layers.57.self_attn.indexer.wq_b",
@@ -740,9 +589,6 @@
740
  "model.layers.57.self_attn.q_a_proj",
741
  "model.layers.57.self_attn.q_b_proj",
742
  "model.layers.58.mlp.gate",
743
- "model.layers.58.mlp.shared_experts.down_proj",
744
- "model.layers.58.mlp.shared_experts.gate_proj",
745
- "model.layers.58.mlp.shared_experts.up_proj",
746
  "model.layers.58.self_attn.indexer.weights_proj",
747
  "model.layers.58.self_attn.indexer.wk",
748
  "model.layers.58.self_attn.indexer.wq_b",
@@ -752,9 +598,6 @@
752
  "model.layers.58.self_attn.q_a_proj",
753
  "model.layers.58.self_attn.q_b_proj",
754
  "model.layers.59.mlp.gate",
755
- "model.layers.59.mlp.shared_experts.down_proj",
756
- "model.layers.59.mlp.shared_experts.gate_proj",
757
- "model.layers.59.mlp.shared_experts.up_proj",
758
  "model.layers.59.self_attn.indexer.weights_proj",
759
  "model.layers.59.self_attn.indexer.wk",
760
  "model.layers.59.self_attn.indexer.wq_b",
@@ -764,9 +607,6 @@
764
  "model.layers.59.self_attn.q_a_proj",
765
  "model.layers.59.self_attn.q_b_proj",
766
  "model.layers.6.mlp.gate",
767
- "model.layers.6.mlp.shared_experts.down_proj",
768
- "model.layers.6.mlp.shared_experts.gate_proj",
769
- "model.layers.6.mlp.shared_experts.up_proj",
770
  "model.layers.6.self_attn.indexer.weights_proj",
771
  "model.layers.6.self_attn.indexer.wk",
772
  "model.layers.6.self_attn.indexer.wq_b",
@@ -776,9 +616,6 @@
776
  "model.layers.6.self_attn.q_a_proj",
777
  "model.layers.6.self_attn.q_b_proj",
778
  "model.layers.60.mlp.gate",
779
- "model.layers.60.mlp.shared_experts.down_proj",
780
- "model.layers.60.mlp.shared_experts.gate_proj",
781
- "model.layers.60.mlp.shared_experts.up_proj",
782
  "model.layers.60.self_attn.indexer.weights_proj",
783
  "model.layers.60.self_attn.indexer.wk",
784
  "model.layers.60.self_attn.indexer.wq_b",
@@ -788,9 +625,6 @@
788
  "model.layers.60.self_attn.q_a_proj",
789
  "model.layers.60.self_attn.q_b_proj",
790
  "model.layers.61.mlp.gate",
791
- "model.layers.61.mlp.shared_experts.down_proj",
792
- "model.layers.61.mlp.shared_experts.gate_proj",
793
- "model.layers.61.mlp.shared_experts.up_proj",
794
  "model.layers.61.self_attn.indexer.weights_proj",
795
  "model.layers.61.self_attn.indexer.wk",
796
  "model.layers.61.self_attn.indexer.wq_b",
@@ -800,9 +634,6 @@
800
  "model.layers.61.self_attn.q_a_proj",
801
  "model.layers.61.self_attn.q_b_proj",
802
  "model.layers.62.mlp.gate",
803
- "model.layers.62.mlp.shared_experts.down_proj",
804
- "model.layers.62.mlp.shared_experts.gate_proj",
805
- "model.layers.62.mlp.shared_experts.up_proj",
806
  "model.layers.62.self_attn.indexer.weights_proj",
807
  "model.layers.62.self_attn.indexer.wk",
808
  "model.layers.62.self_attn.indexer.wq_b",
@@ -812,9 +643,6 @@
812
  "model.layers.62.self_attn.q_a_proj",
813
  "model.layers.62.self_attn.q_b_proj",
814
  "model.layers.63.mlp.gate",
815
- "model.layers.63.mlp.shared_experts.down_proj",
816
- "model.layers.63.mlp.shared_experts.gate_proj",
817
- "model.layers.63.mlp.shared_experts.up_proj",
818
  "model.layers.63.self_attn.indexer.weights_proj",
819
  "model.layers.63.self_attn.indexer.wk",
820
  "model.layers.63.self_attn.indexer.wq_b",
@@ -824,9 +652,6 @@
824
  "model.layers.63.self_attn.q_a_proj",
825
  "model.layers.63.self_attn.q_b_proj",
826
  "model.layers.64.mlp.gate",
827
- "model.layers.64.mlp.shared_experts.down_proj",
828
- "model.layers.64.mlp.shared_experts.gate_proj",
829
- "model.layers.64.mlp.shared_experts.up_proj",
830
  "model.layers.64.self_attn.indexer.weights_proj",
831
  "model.layers.64.self_attn.indexer.wk",
832
  "model.layers.64.self_attn.indexer.wq_b",
@@ -836,9 +661,6 @@
836
  "model.layers.64.self_attn.q_a_proj",
837
  "model.layers.64.self_attn.q_b_proj",
838
  "model.layers.65.mlp.gate",
839
- "model.layers.65.mlp.shared_experts.down_proj",
840
- "model.layers.65.mlp.shared_experts.gate_proj",
841
- "model.layers.65.mlp.shared_experts.up_proj",
842
  "model.layers.65.self_attn.indexer.weights_proj",
843
  "model.layers.65.self_attn.indexer.wk",
844
  "model.layers.65.self_attn.indexer.wq_b",
@@ -848,9 +670,6 @@
848
  "model.layers.65.self_attn.q_a_proj",
849
  "model.layers.65.self_attn.q_b_proj",
850
  "model.layers.66.mlp.gate",
851
- "model.layers.66.mlp.shared_experts.down_proj",
852
- "model.layers.66.mlp.shared_experts.gate_proj",
853
- "model.layers.66.mlp.shared_experts.up_proj",
854
  "model.layers.66.self_attn.indexer.weights_proj",
855
  "model.layers.66.self_attn.indexer.wk",
856
  "model.layers.66.self_attn.indexer.wq_b",
@@ -860,9 +679,6 @@
860
  "model.layers.66.self_attn.q_a_proj",
861
  "model.layers.66.self_attn.q_b_proj",
862
  "model.layers.67.mlp.gate",
863
- "model.layers.67.mlp.shared_experts.down_proj",
864
- "model.layers.67.mlp.shared_experts.gate_proj",
865
- "model.layers.67.mlp.shared_experts.up_proj",
866
  "model.layers.67.self_attn.indexer.weights_proj",
867
  "model.layers.67.self_attn.indexer.wk",
868
  "model.layers.67.self_attn.indexer.wq_b",
@@ -872,9 +688,6 @@
872
  "model.layers.67.self_attn.q_a_proj",
873
  "model.layers.67.self_attn.q_b_proj",
874
  "model.layers.68.mlp.gate",
875
- "model.layers.68.mlp.shared_experts.down_proj",
876
- "model.layers.68.mlp.shared_experts.gate_proj",
877
- "model.layers.68.mlp.shared_experts.up_proj",
878
  "model.layers.68.self_attn.indexer.weights_proj",
879
  "model.layers.68.self_attn.indexer.wk",
880
  "model.layers.68.self_attn.indexer.wq_b",
@@ -884,9 +697,6 @@
884
  "model.layers.68.self_attn.q_a_proj",
885
  "model.layers.68.self_attn.q_b_proj",
886
  "model.layers.69.mlp.gate",
887
- "model.layers.69.mlp.shared_experts.down_proj",
888
- "model.layers.69.mlp.shared_experts.gate_proj",
889
- "model.layers.69.mlp.shared_experts.up_proj",
890
  "model.layers.69.self_attn.indexer.weights_proj",
891
  "model.layers.69.self_attn.indexer.wk",
892
  "model.layers.69.self_attn.indexer.wq_b",
@@ -896,9 +706,6 @@
896
  "model.layers.69.self_attn.q_a_proj",
897
  "model.layers.69.self_attn.q_b_proj",
898
  "model.layers.7.mlp.gate",
899
- "model.layers.7.mlp.shared_experts.down_proj",
900
- "model.layers.7.mlp.shared_experts.gate_proj",
901
- "model.layers.7.mlp.shared_experts.up_proj",
902
  "model.layers.7.self_attn.indexer.weights_proj",
903
  "model.layers.7.self_attn.indexer.wk",
904
  "model.layers.7.self_attn.indexer.wq_b",
@@ -908,9 +715,6 @@
908
  "model.layers.7.self_attn.q_a_proj",
909
  "model.layers.7.self_attn.q_b_proj",
910
  "model.layers.70.mlp.gate",
911
- "model.layers.70.mlp.shared_experts.down_proj",
912
- "model.layers.70.mlp.shared_experts.gate_proj",
913
- "model.layers.70.mlp.shared_experts.up_proj",
914
  "model.layers.70.self_attn.indexer.weights_proj",
915
  "model.layers.70.self_attn.indexer.wk",
916
  "model.layers.70.self_attn.indexer.wq_b",
@@ -920,9 +724,6 @@
920
  "model.layers.70.self_attn.q_a_proj",
921
  "model.layers.70.self_attn.q_b_proj",
922
  "model.layers.71.mlp.gate",
923
- "model.layers.71.mlp.shared_experts.down_proj",
924
- "model.layers.71.mlp.shared_experts.gate_proj",
925
- "model.layers.71.mlp.shared_experts.up_proj",
926
  "model.layers.71.self_attn.indexer.weights_proj",
927
  "model.layers.71.self_attn.indexer.wk",
928
  "model.layers.71.self_attn.indexer.wq_b",
@@ -932,9 +733,6 @@
932
  "model.layers.71.self_attn.q_a_proj",
933
  "model.layers.71.self_attn.q_b_proj",
934
  "model.layers.72.mlp.gate",
935
- "model.layers.72.mlp.shared_experts.down_proj",
936
- "model.layers.72.mlp.shared_experts.gate_proj",
937
- "model.layers.72.mlp.shared_experts.up_proj",
938
  "model.layers.72.self_attn.indexer.weights_proj",
939
  "model.layers.72.self_attn.indexer.wk",
940
  "model.layers.72.self_attn.indexer.wq_b",
@@ -944,9 +742,6 @@
944
  "model.layers.72.self_attn.q_a_proj",
945
  "model.layers.72.self_attn.q_b_proj",
946
  "model.layers.73.mlp.gate",
947
- "model.layers.73.mlp.shared_experts.down_proj",
948
- "model.layers.73.mlp.shared_experts.gate_proj",
949
- "model.layers.73.mlp.shared_experts.up_proj",
950
  "model.layers.73.self_attn.indexer.weights_proj",
951
  "model.layers.73.self_attn.indexer.wk",
952
  "model.layers.73.self_attn.indexer.wq_b",
@@ -956,9 +751,6 @@
956
  "model.layers.73.self_attn.q_a_proj",
957
  "model.layers.73.self_attn.q_b_proj",
958
  "model.layers.74.mlp.gate",
959
- "model.layers.74.mlp.shared_experts.down_proj",
960
- "model.layers.74.mlp.shared_experts.gate_proj",
961
- "model.layers.74.mlp.shared_experts.up_proj",
962
  "model.layers.74.self_attn.indexer.weights_proj",
963
  "model.layers.74.self_attn.indexer.wk",
964
  "model.layers.74.self_attn.indexer.wq_b",
@@ -968,9 +760,6 @@
968
  "model.layers.74.self_attn.q_a_proj",
969
  "model.layers.74.self_attn.q_b_proj",
970
  "model.layers.75.mlp.gate",
971
- "model.layers.75.mlp.shared_experts.down_proj",
972
- "model.layers.75.mlp.shared_experts.gate_proj",
973
- "model.layers.75.mlp.shared_experts.up_proj",
974
  "model.layers.75.self_attn.indexer.weights_proj",
975
  "model.layers.75.self_attn.indexer.wk",
976
  "model.layers.75.self_attn.indexer.wq_b",
@@ -980,9 +769,6 @@
980
  "model.layers.75.self_attn.q_a_proj",
981
  "model.layers.75.self_attn.q_b_proj",
982
  "model.layers.76.mlp.gate",
983
- "model.layers.76.mlp.shared_experts.down_proj",
984
- "model.layers.76.mlp.shared_experts.gate_proj",
985
- "model.layers.76.mlp.shared_experts.up_proj",
986
  "model.layers.76.self_attn.indexer.weights_proj",
987
  "model.layers.76.self_attn.indexer.wk",
988
  "model.layers.76.self_attn.indexer.wq_b",
@@ -992,9 +778,6 @@
992
  "model.layers.76.self_attn.q_a_proj",
993
  "model.layers.76.self_attn.q_b_proj",
994
  "model.layers.77.mlp.gate",
995
- "model.layers.77.mlp.shared_experts.down_proj",
996
- "model.layers.77.mlp.shared_experts.gate_proj",
997
- "model.layers.77.mlp.shared_experts.up_proj",
998
  "model.layers.77.self_attn.indexer.weights_proj",
999
  "model.layers.77.self_attn.indexer.wk",
1000
  "model.layers.77.self_attn.indexer.wq_b",
@@ -1004,9 +787,6 @@
1004
  "model.layers.77.self_attn.q_a_proj",
1005
  "model.layers.77.self_attn.q_b_proj",
1006
  "model.layers.78.mlp.gate",
1007
- "model.layers.78.mlp.shared_experts.down_proj",
1008
- "model.layers.78.mlp.shared_experts.gate_proj",
1009
- "model.layers.78.mlp.shared_experts.up_proj",
1010
  "model.layers.78.self_attn.indexer.weights_proj",
1011
  "model.layers.78.self_attn.indexer.wk",
1012
  "model.layers.78.self_attn.indexer.wq_b",
@@ -1016,9 +796,6 @@
1016
  "model.layers.78.self_attn.q_a_proj",
1017
  "model.layers.78.self_attn.q_b_proj",
1018
  "model.layers.8.mlp.gate",
1019
- "model.layers.8.mlp.shared_experts.down_proj",
1020
- "model.layers.8.mlp.shared_experts.gate_proj",
1021
- "model.layers.8.mlp.shared_experts.up_proj",
1022
  "model.layers.8.self_attn.indexer.weights_proj",
1023
  "model.layers.8.self_attn.indexer.wk",
1024
  "model.layers.8.self_attn.indexer.wq_b",
@@ -1028,9 +805,6 @@
1028
  "model.layers.8.self_attn.q_a_proj",
1029
  "model.layers.8.self_attn.q_b_proj",
1030
  "model.layers.9.mlp.gate",
1031
- "model.layers.9.mlp.shared_experts.down_proj",
1032
- "model.layers.9.mlp.shared_experts.gate_proj",
1033
- "model.layers.9.mlp.shared_experts.up_proj",
1034
  "model.layers.9.self_attn.indexer.weights_proj",
1035
  "model.layers.9.self_attn.indexer.wk",
1036
  "model.layers.9.self_attn.indexer.wq_b",
@@ -1048,7 +822,7 @@
1048
  "kv_cache_quant_config": {},
1049
  "kv_cache_post_rope": false,
1050
  "quant_mode": "eager_mode",
1051
- "version": "0.12+6ff6457c80",
1052
  "export": {
1053
  "kv_cache_group": [],
1054
  "min_kv_scale": 0.0,
 
64
  "qscheme": "per_group",
65
  "ch_axis": -1,
66
  "group_size": 32,
67
+ "block_size": null,
68
  "symmetric": null,
69
  "round_method": "half_even",
70
  "scale_type": "float",
 
81
  "qscheme": "per_group",
82
  "ch_axis": -1,
83
  "group_size": 32,
84
+ "block_size": null,
85
  "symmetric": null,
86
  "round_method": "half_even",
87
  "scale_type": "float",
 
119
  "model.layers.1.self_attn.q_a_proj",
120
  "model.layers.1.self_attn.q_b_proj",
121
  "model.layers.10.mlp.gate",
 
 
 
122
  "model.layers.10.self_attn.indexer.weights_proj",
123
  "model.layers.10.self_attn.indexer.wk",
124
  "model.layers.10.self_attn.indexer.wq_b",
 
128
  "model.layers.10.self_attn.q_a_proj",
129
  "model.layers.10.self_attn.q_b_proj",
130
  "model.layers.11.mlp.gate",
 
 
 
131
  "model.layers.11.self_attn.indexer.weights_proj",
132
  "model.layers.11.self_attn.indexer.wk",
133
  "model.layers.11.self_attn.indexer.wq_b",
 
137
  "model.layers.11.self_attn.q_a_proj",
138
  "model.layers.11.self_attn.q_b_proj",
139
  "model.layers.12.mlp.gate",
 
 
 
140
  "model.layers.12.self_attn.indexer.weights_proj",
141
  "model.layers.12.self_attn.indexer.wk",
142
  "model.layers.12.self_attn.indexer.wq_b",
 
146
  "model.layers.12.self_attn.q_a_proj",
147
  "model.layers.12.self_attn.q_b_proj",
148
  "model.layers.13.mlp.gate",
 
 
 
149
  "model.layers.13.self_attn.indexer.weights_proj",
150
  "model.layers.13.self_attn.indexer.wk",
151
  "model.layers.13.self_attn.indexer.wq_b",
 
155
  "model.layers.13.self_attn.q_a_proj",
156
  "model.layers.13.self_attn.q_b_proj",
157
  "model.layers.14.mlp.gate",
 
 
 
158
  "model.layers.14.self_attn.indexer.weights_proj",
159
  "model.layers.14.self_attn.indexer.wk",
160
  "model.layers.14.self_attn.indexer.wq_b",
 
164
  "model.layers.14.self_attn.q_a_proj",
165
  "model.layers.14.self_attn.q_b_proj",
166
  "model.layers.15.mlp.gate",
 
 
 
167
  "model.layers.15.self_attn.indexer.weights_proj",
168
  "model.layers.15.self_attn.indexer.wk",
169
  "model.layers.15.self_attn.indexer.wq_b",
 
173
  "model.layers.15.self_attn.q_a_proj",
174
  "model.layers.15.self_attn.q_b_proj",
175
  "model.layers.16.mlp.gate",
 
 
 
176
  "model.layers.16.self_attn.indexer.weights_proj",
177
  "model.layers.16.self_attn.indexer.wk",
178
  "model.layers.16.self_attn.indexer.wq_b",
 
182
  "model.layers.16.self_attn.q_a_proj",
183
  "model.layers.16.self_attn.q_b_proj",
184
  "model.layers.17.mlp.gate",
 
 
 
185
  "model.layers.17.self_attn.indexer.weights_proj",
186
  "model.layers.17.self_attn.indexer.wk",
187
  "model.layers.17.self_attn.indexer.wq_b",
 
191
  "model.layers.17.self_attn.q_a_proj",
192
  "model.layers.17.self_attn.q_b_proj",
193
  "model.layers.18.mlp.gate",
 
 
 
194
  "model.layers.18.self_attn.indexer.weights_proj",
195
  "model.layers.18.self_attn.indexer.wk",
196
  "model.layers.18.self_attn.indexer.wq_b",
 
200
  "model.layers.18.self_attn.q_a_proj",
201
  "model.layers.18.self_attn.q_b_proj",
202
  "model.layers.19.mlp.gate",
 
 
 
203
  "model.layers.19.self_attn.indexer.weights_proj",
204
  "model.layers.19.self_attn.indexer.wk",
205
  "model.layers.19.self_attn.indexer.wq_b",
 
220
  "model.layers.2.self_attn.q_a_proj",
221
  "model.layers.2.self_attn.q_b_proj",
222
  "model.layers.20.mlp.gate",
 
 
 
223
  "model.layers.20.self_attn.indexer.weights_proj",
224
  "model.layers.20.self_attn.indexer.wk",
225
  "model.layers.20.self_attn.indexer.wq_b",
 
229
  "model.layers.20.self_attn.q_a_proj",
230
  "model.layers.20.self_attn.q_b_proj",
231
  "model.layers.21.mlp.gate",
 
 
 
232
  "model.layers.21.self_attn.indexer.weights_proj",
233
  "model.layers.21.self_attn.indexer.wk",
234
  "model.layers.21.self_attn.indexer.wq_b",
 
238
  "model.layers.21.self_attn.q_a_proj",
239
  "model.layers.21.self_attn.q_b_proj",
240
  "model.layers.22.mlp.gate",
 
 
 
241
  "model.layers.22.self_attn.indexer.weights_proj",
242
  "model.layers.22.self_attn.indexer.wk",
243
  "model.layers.22.self_attn.indexer.wq_b",
 
247
  "model.layers.22.self_attn.q_a_proj",
248
  "model.layers.22.self_attn.q_b_proj",
249
  "model.layers.23.mlp.gate",
 
 
 
250
  "model.layers.23.self_attn.indexer.weights_proj",
251
  "model.layers.23.self_attn.indexer.wk",
252
  "model.layers.23.self_attn.indexer.wq_b",
 
256
  "model.layers.23.self_attn.q_a_proj",
257
  "model.layers.23.self_attn.q_b_proj",
258
  "model.layers.24.mlp.gate",
 
 
 
259
  "model.layers.24.self_attn.indexer.weights_proj",
260
  "model.layers.24.self_attn.indexer.wk",
261
  "model.layers.24.self_attn.indexer.wq_b",
 
265
  "model.layers.24.self_attn.q_a_proj",
266
  "model.layers.24.self_attn.q_b_proj",
267
  "model.layers.25.mlp.gate",
 
 
 
268
  "model.layers.25.self_attn.indexer.weights_proj",
269
  "model.layers.25.self_attn.indexer.wk",
270
  "model.layers.25.self_attn.indexer.wq_b",
 
274
  "model.layers.25.self_attn.q_a_proj",
275
  "model.layers.25.self_attn.q_b_proj",
276
  "model.layers.26.mlp.gate",
 
 
 
277
  "model.layers.26.self_attn.indexer.weights_proj",
278
  "model.layers.26.self_attn.indexer.wk",
279
  "model.layers.26.self_attn.indexer.wq_b",
 
283
  "model.layers.26.self_attn.q_a_proj",
284
  "model.layers.26.self_attn.q_b_proj",
285
  "model.layers.27.mlp.gate",
 
 
 
286
  "model.layers.27.self_attn.indexer.weights_proj",
287
  "model.layers.27.self_attn.indexer.wk",
288
  "model.layers.27.self_attn.indexer.wq_b",
 
292
  "model.layers.27.self_attn.q_a_proj",
293
  "model.layers.27.self_attn.q_b_proj",
294
  "model.layers.28.mlp.gate",
 
 
 
295
  "model.layers.28.self_attn.indexer.weights_proj",
296
  "model.layers.28.self_attn.indexer.wk",
297
  "model.layers.28.self_attn.indexer.wq_b",
 
301
  "model.layers.28.self_attn.q_a_proj",
302
  "model.layers.28.self_attn.q_b_proj",
303
  "model.layers.29.mlp.gate",
 
 
 
304
  "model.layers.29.self_attn.indexer.weights_proj",
305
  "model.layers.29.self_attn.indexer.wk",
306
  "model.layers.29.self_attn.indexer.wq_b",
 
310
  "model.layers.29.self_attn.q_a_proj",
311
  "model.layers.29.self_attn.q_b_proj",
312
  "model.layers.3.mlp.gate",
 
 
 
313
  "model.layers.3.self_attn.indexer.weights_proj",
314
  "model.layers.3.self_attn.indexer.wk",
315
  "model.layers.3.self_attn.indexer.wq_b",
 
319
  "model.layers.3.self_attn.q_a_proj",
320
  "model.layers.3.self_attn.q_b_proj",
321
  "model.layers.30.mlp.gate",
 
 
 
322
  "model.layers.30.self_attn.indexer.weights_proj",
323
  "model.layers.30.self_attn.indexer.wk",
324
  "model.layers.30.self_attn.indexer.wq_b",
 
328
  "model.layers.30.self_attn.q_a_proj",
329
  "model.layers.30.self_attn.q_b_proj",
330
  "model.layers.31.mlp.gate",
 
 
 
331
  "model.layers.31.self_attn.indexer.weights_proj",
332
  "model.layers.31.self_attn.indexer.wk",
333
  "model.layers.31.self_attn.indexer.wq_b",
 
337
  "model.layers.31.self_attn.q_a_proj",
338
  "model.layers.31.self_attn.q_b_proj",
339
  "model.layers.32.mlp.gate",
 
 
 
340
  "model.layers.32.self_attn.indexer.weights_proj",
341
  "model.layers.32.self_attn.indexer.wk",
342
  "model.layers.32.self_attn.indexer.wq_b",
 
346
  "model.layers.32.self_attn.q_a_proj",
347
  "model.layers.32.self_attn.q_b_proj",
348
  "model.layers.33.mlp.gate",
 
 
 
349
  "model.layers.33.self_attn.indexer.weights_proj",
350
  "model.layers.33.self_attn.indexer.wk",
351
  "model.layers.33.self_attn.indexer.wq_b",
 
355
  "model.layers.33.self_attn.q_a_proj",
356
  "model.layers.33.self_attn.q_b_proj",
357
  "model.layers.34.mlp.gate",
 
 
 
358
  "model.layers.34.self_attn.indexer.weights_proj",
359
  "model.layers.34.self_attn.indexer.wk",
360
  "model.layers.34.self_attn.indexer.wq_b",
 
364
  "model.layers.34.self_attn.q_a_proj",
365
  "model.layers.34.self_attn.q_b_proj",
366
  "model.layers.35.mlp.gate",
 
 
 
367
  "model.layers.35.self_attn.indexer.weights_proj",
368
  "model.layers.35.self_attn.indexer.wk",
369
  "model.layers.35.self_attn.indexer.wq_b",
 
373
  "model.layers.35.self_attn.q_a_proj",
374
  "model.layers.35.self_attn.q_b_proj",
375
  "model.layers.36.mlp.gate",
 
 
 
376
  "model.layers.36.self_attn.indexer.weights_proj",
377
  "model.layers.36.self_attn.indexer.wk",
378
  "model.layers.36.self_attn.indexer.wq_b",
 
382
  "model.layers.36.self_attn.q_a_proj",
383
  "model.layers.36.self_attn.q_b_proj",
384
  "model.layers.37.mlp.gate",
 
 
 
385
  "model.layers.37.self_attn.indexer.weights_proj",
386
  "model.layers.37.self_attn.indexer.wk",
387
  "model.layers.37.self_attn.indexer.wq_b",
 
391
  "model.layers.37.self_attn.q_a_proj",
392
  "model.layers.37.self_attn.q_b_proj",
393
  "model.layers.38.mlp.gate",
 
 
 
394
  "model.layers.38.self_attn.indexer.weights_proj",
395
  "model.layers.38.self_attn.indexer.wk",
396
  "model.layers.38.self_attn.indexer.wq_b",
 
400
  "model.layers.38.self_attn.q_a_proj",
401
  "model.layers.38.self_attn.q_b_proj",
402
  "model.layers.39.mlp.gate",
 
 
 
403
  "model.layers.39.self_attn.indexer.weights_proj",
404
  "model.layers.39.self_attn.indexer.wk",
405
  "model.layers.39.self_attn.indexer.wq_b",
 
409
  "model.layers.39.self_attn.q_a_proj",
410
  "model.layers.39.self_attn.q_b_proj",
411
  "model.layers.4.mlp.gate",
 
 
 
412
  "model.layers.4.self_attn.indexer.weights_proj",
413
  "model.layers.4.self_attn.indexer.wk",
414
  "model.layers.4.self_attn.indexer.wq_b",
 
418
  "model.layers.4.self_attn.q_a_proj",
419
  "model.layers.4.self_attn.q_b_proj",
420
  "model.layers.40.mlp.gate",
 
 
 
421
  "model.layers.40.self_attn.indexer.weights_proj",
422
  "model.layers.40.self_attn.indexer.wk",
423
  "model.layers.40.self_attn.indexer.wq_b",
 
427
  "model.layers.40.self_attn.q_a_proj",
428
  "model.layers.40.self_attn.q_b_proj",
429
  "model.layers.41.mlp.gate",
 
 
 
430
  "model.layers.41.self_attn.indexer.weights_proj",
431
  "model.layers.41.self_attn.indexer.wk",
432
  "model.layers.41.self_attn.indexer.wq_b",
 
436
  "model.layers.41.self_attn.q_a_proj",
437
  "model.layers.41.self_attn.q_b_proj",
438
  "model.layers.42.mlp.gate",
 
 
 
439
  "model.layers.42.self_attn.indexer.weights_proj",
440
  "model.layers.42.self_attn.indexer.wk",
441
  "model.layers.42.self_attn.indexer.wq_b",
 
445
  "model.layers.42.self_attn.q_a_proj",
446
  "model.layers.42.self_attn.q_b_proj",
447
  "model.layers.43.mlp.gate",
 
 
 
448
  "model.layers.43.self_attn.indexer.weights_proj",
449
  "model.layers.43.self_attn.indexer.wk",
450
  "model.layers.43.self_attn.indexer.wq_b",
 
454
  "model.layers.43.self_attn.q_a_proj",
455
  "model.layers.43.self_attn.q_b_proj",
456
  "model.layers.44.mlp.gate",
 
 
 
457
  "model.layers.44.self_attn.indexer.weights_proj",
458
  "model.layers.44.self_attn.indexer.wk",
459
  "model.layers.44.self_attn.indexer.wq_b",
 
463
  "model.layers.44.self_attn.q_a_proj",
464
  "model.layers.44.self_attn.q_b_proj",
465
  "model.layers.45.mlp.gate",
 
 
 
466
  "model.layers.45.self_attn.indexer.weights_proj",
467
  "model.layers.45.self_attn.indexer.wk",
468
  "model.layers.45.self_attn.indexer.wq_b",
 
472
  "model.layers.45.self_attn.q_a_proj",
473
  "model.layers.45.self_attn.q_b_proj",
474
  "model.layers.46.mlp.gate",
 
 
 
475
  "model.layers.46.self_attn.indexer.weights_proj",
476
  "model.layers.46.self_attn.indexer.wk",
477
  "model.layers.46.self_attn.indexer.wq_b",
 
481
  "model.layers.46.self_attn.q_a_proj",
482
  "model.layers.46.self_attn.q_b_proj",
483
  "model.layers.47.mlp.gate",
 
 
 
484
  "model.layers.47.self_attn.indexer.weights_proj",
485
  "model.layers.47.self_attn.indexer.wk",
486
  "model.layers.47.self_attn.indexer.wq_b",
 
490
  "model.layers.47.self_attn.q_a_proj",
491
  "model.layers.47.self_attn.q_b_proj",
492
  "model.layers.48.mlp.gate",
 
 
 
493
  "model.layers.48.self_attn.indexer.weights_proj",
494
  "model.layers.48.self_attn.indexer.wk",
495
  "model.layers.48.self_attn.indexer.wq_b",
 
499
  "model.layers.48.self_attn.q_a_proj",
500
  "model.layers.48.self_attn.q_b_proj",
501
  "model.layers.49.mlp.gate",
 
 
 
502
  "model.layers.49.self_attn.indexer.weights_proj",
503
  "model.layers.49.self_attn.indexer.wk",
504
  "model.layers.49.self_attn.indexer.wq_b",
 
508
  "model.layers.49.self_attn.q_a_proj",
509
  "model.layers.49.self_attn.q_b_proj",
510
  "model.layers.5.mlp.gate",
 
 
 
511
  "model.layers.5.self_attn.indexer.weights_proj",
512
  "model.layers.5.self_attn.indexer.wk",
513
  "model.layers.5.self_attn.indexer.wq_b",
 
517
  "model.layers.5.self_attn.q_a_proj",
518
  "model.layers.5.self_attn.q_b_proj",
519
  "model.layers.50.mlp.gate",
 
 
 
520
  "model.layers.50.self_attn.indexer.weights_proj",
521
  "model.layers.50.self_attn.indexer.wk",
522
  "model.layers.50.self_attn.indexer.wq_b",
 
526
  "model.layers.50.self_attn.q_a_proj",
527
  "model.layers.50.self_attn.q_b_proj",
528
  "model.layers.51.mlp.gate",
 
 
 
529
  "model.layers.51.self_attn.indexer.weights_proj",
530
  "model.layers.51.self_attn.indexer.wk",
531
  "model.layers.51.self_attn.indexer.wq_b",
 
535
  "model.layers.51.self_attn.q_a_proj",
536
  "model.layers.51.self_attn.q_b_proj",
537
  "model.layers.52.mlp.gate",
 
 
 
538
  "model.layers.52.self_attn.indexer.weights_proj",
539
  "model.layers.52.self_attn.indexer.wk",
540
  "model.layers.52.self_attn.indexer.wq_b",
 
544
  "model.layers.52.self_attn.q_a_proj",
545
  "model.layers.52.self_attn.q_b_proj",
546
  "model.layers.53.mlp.gate",
 
 
 
547
  "model.layers.53.self_attn.indexer.weights_proj",
548
  "model.layers.53.self_attn.indexer.wk",
549
  "model.layers.53.self_attn.indexer.wq_b",
 
553
  "model.layers.53.self_attn.q_a_proj",
554
  "model.layers.53.self_attn.q_b_proj",
555
  "model.layers.54.mlp.gate",
 
 
 
556
  "model.layers.54.self_attn.indexer.weights_proj",
557
  "model.layers.54.self_attn.indexer.wk",
558
  "model.layers.54.self_attn.indexer.wq_b",
 
562
  "model.layers.54.self_attn.q_a_proj",
563
  "model.layers.54.self_attn.q_b_proj",
564
  "model.layers.55.mlp.gate",
 
 
 
565
  "model.layers.55.self_attn.indexer.weights_proj",
566
  "model.layers.55.self_attn.indexer.wk",
567
  "model.layers.55.self_attn.indexer.wq_b",
 
571
  "model.layers.55.self_attn.q_a_proj",
572
  "model.layers.55.self_attn.q_b_proj",
573
  "model.layers.56.mlp.gate",
 
 
 
574
  "model.layers.56.self_attn.indexer.weights_proj",
575
  "model.layers.56.self_attn.indexer.wk",
576
  "model.layers.56.self_attn.indexer.wq_b",
 
580
  "model.layers.56.self_attn.q_a_proj",
581
  "model.layers.56.self_attn.q_b_proj",
582
  "model.layers.57.mlp.gate",
 
 
 
583
  "model.layers.57.self_attn.indexer.weights_proj",
584
  "model.layers.57.self_attn.indexer.wk",
585
  "model.layers.57.self_attn.indexer.wq_b",
 
589
  "model.layers.57.self_attn.q_a_proj",
590
  "model.layers.57.self_attn.q_b_proj",
591
  "model.layers.58.mlp.gate",
 
 
 
592
  "model.layers.58.self_attn.indexer.weights_proj",
593
  "model.layers.58.self_attn.indexer.wk",
594
  "model.layers.58.self_attn.indexer.wq_b",
 
598
  "model.layers.58.self_attn.q_a_proj",
599
  "model.layers.58.self_attn.q_b_proj",
600
  "model.layers.59.mlp.gate",
 
 
 
601
  "model.layers.59.self_attn.indexer.weights_proj",
602
  "model.layers.59.self_attn.indexer.wk",
603
  "model.layers.59.self_attn.indexer.wq_b",
 
607
  "model.layers.59.self_attn.q_a_proj",
608
  "model.layers.59.self_attn.q_b_proj",
609
  "model.layers.6.mlp.gate",
 
 
 
610
  "model.layers.6.self_attn.indexer.weights_proj",
611
  "model.layers.6.self_attn.indexer.wk",
612
  "model.layers.6.self_attn.indexer.wq_b",
 
616
  "model.layers.6.self_attn.q_a_proj",
617
  "model.layers.6.self_attn.q_b_proj",
618
  "model.layers.60.mlp.gate",
 
 
 
619
  "model.layers.60.self_attn.indexer.weights_proj",
620
  "model.layers.60.self_attn.indexer.wk",
621
  "model.layers.60.self_attn.indexer.wq_b",
 
625
  "model.layers.60.self_attn.q_a_proj",
626
  "model.layers.60.self_attn.q_b_proj",
627
  "model.layers.61.mlp.gate",
 
 
 
628
  "model.layers.61.self_attn.indexer.weights_proj",
629
  "model.layers.61.self_attn.indexer.wk",
630
  "model.layers.61.self_attn.indexer.wq_b",
 
634
  "model.layers.61.self_attn.q_a_proj",
635
  "model.layers.61.self_attn.q_b_proj",
636
  "model.layers.62.mlp.gate",
 
 
 
637
  "model.layers.62.self_attn.indexer.weights_proj",
638
  "model.layers.62.self_attn.indexer.wk",
639
  "model.layers.62.self_attn.indexer.wq_b",
 
643
  "model.layers.62.self_attn.q_a_proj",
644
  "model.layers.62.self_attn.q_b_proj",
645
  "model.layers.63.mlp.gate",
 
 
 
646
  "model.layers.63.self_attn.indexer.weights_proj",
647
  "model.layers.63.self_attn.indexer.wk",
648
  "model.layers.63.self_attn.indexer.wq_b",
 
652
  "model.layers.63.self_attn.q_a_proj",
653
  "model.layers.63.self_attn.q_b_proj",
654
  "model.layers.64.mlp.gate",
 
 
 
655
  "model.layers.64.self_attn.indexer.weights_proj",
656
  "model.layers.64.self_attn.indexer.wk",
657
  "model.layers.64.self_attn.indexer.wq_b",
 
661
  "model.layers.64.self_attn.q_a_proj",
662
  "model.layers.64.self_attn.q_b_proj",
663
  "model.layers.65.mlp.gate",
 
 
 
664
  "model.layers.65.self_attn.indexer.weights_proj",
665
  "model.layers.65.self_attn.indexer.wk",
666
  "model.layers.65.self_attn.indexer.wq_b",
 
670
  "model.layers.65.self_attn.q_a_proj",
671
  "model.layers.65.self_attn.q_b_proj",
672
  "model.layers.66.mlp.gate",
 
 
 
673
  "model.layers.66.self_attn.indexer.weights_proj",
674
  "model.layers.66.self_attn.indexer.wk",
675
  "model.layers.66.self_attn.indexer.wq_b",
 
679
  "model.layers.66.self_attn.q_a_proj",
680
  "model.layers.66.self_attn.q_b_proj",
681
  "model.layers.67.mlp.gate",
 
 
 
682
  "model.layers.67.self_attn.indexer.weights_proj",
683
  "model.layers.67.self_attn.indexer.wk",
684
  "model.layers.67.self_attn.indexer.wq_b",
 
688
  "model.layers.67.self_attn.q_a_proj",
689
  "model.layers.67.self_attn.q_b_proj",
690
  "model.layers.68.mlp.gate",
 
 
 
691
  "model.layers.68.self_attn.indexer.weights_proj",
692
  "model.layers.68.self_attn.indexer.wk",
693
  "model.layers.68.self_attn.indexer.wq_b",
 
697
  "model.layers.68.self_attn.q_a_proj",
698
  "model.layers.68.self_attn.q_b_proj",
699
  "model.layers.69.mlp.gate",
 
 
 
700
  "model.layers.69.self_attn.indexer.weights_proj",
701
  "model.layers.69.self_attn.indexer.wk",
702
  "model.layers.69.self_attn.indexer.wq_b",
 
706
  "model.layers.69.self_attn.q_a_proj",
707
  "model.layers.69.self_attn.q_b_proj",
708
  "model.layers.7.mlp.gate",
 
 
 
709
  "model.layers.7.self_attn.indexer.weights_proj",
710
  "model.layers.7.self_attn.indexer.wk",
711
  "model.layers.7.self_attn.indexer.wq_b",
 
715
  "model.layers.7.self_attn.q_a_proj",
716
  "model.layers.7.self_attn.q_b_proj",
717
  "model.layers.70.mlp.gate",
 
 
 
718
  "model.layers.70.self_attn.indexer.weights_proj",
719
  "model.layers.70.self_attn.indexer.wk",
720
  "model.layers.70.self_attn.indexer.wq_b",
 
724
  "model.layers.70.self_attn.q_a_proj",
725
  "model.layers.70.self_attn.q_b_proj",
726
  "model.layers.71.mlp.gate",
 
 
 
727
  "model.layers.71.self_attn.indexer.weights_proj",
728
  "model.layers.71.self_attn.indexer.wk",
729
  "model.layers.71.self_attn.indexer.wq_b",
 
733
  "model.layers.71.self_attn.q_a_proj",
734
  "model.layers.71.self_attn.q_b_proj",
735
  "model.layers.72.mlp.gate",
 
 
 
736
  "model.layers.72.self_attn.indexer.weights_proj",
737
  "model.layers.72.self_attn.indexer.wk",
738
  "model.layers.72.self_attn.indexer.wq_b",
 
742
  "model.layers.72.self_attn.q_a_proj",
743
  "model.layers.72.self_attn.q_b_proj",
744
  "model.layers.73.mlp.gate",
 
 
 
745
  "model.layers.73.self_attn.indexer.weights_proj",
746
  "model.layers.73.self_attn.indexer.wk",
747
  "model.layers.73.self_attn.indexer.wq_b",
 
751
  "model.layers.73.self_attn.q_a_proj",
752
  "model.layers.73.self_attn.q_b_proj",
753
  "model.layers.74.mlp.gate",
 
 
 
754
  "model.layers.74.self_attn.indexer.weights_proj",
755
  "model.layers.74.self_attn.indexer.wk",
756
  "model.layers.74.self_attn.indexer.wq_b",
 
760
  "model.layers.74.self_attn.q_a_proj",
761
  "model.layers.74.self_attn.q_b_proj",
762
  "model.layers.75.mlp.gate",
 
 
 
763
  "model.layers.75.self_attn.indexer.weights_proj",
764
  "model.layers.75.self_attn.indexer.wk",
765
  "model.layers.75.self_attn.indexer.wq_b",
 
769
  "model.layers.75.self_attn.q_a_proj",
770
  "model.layers.75.self_attn.q_b_proj",
771
  "model.layers.76.mlp.gate",
 
 
 
772
  "model.layers.76.self_attn.indexer.weights_proj",
773
  "model.layers.76.self_attn.indexer.wk",
774
  "model.layers.76.self_attn.indexer.wq_b",
 
778
  "model.layers.76.self_attn.q_a_proj",
779
  "model.layers.76.self_attn.q_b_proj",
780
  "model.layers.77.mlp.gate",
 
 
 
781
  "model.layers.77.self_attn.indexer.weights_proj",
782
  "model.layers.77.self_attn.indexer.wk",
783
  "model.layers.77.self_attn.indexer.wq_b",
 
787
  "model.layers.77.self_attn.q_a_proj",
788
  "model.layers.77.self_attn.q_b_proj",
789
  "model.layers.78.mlp.gate",
 
 
 
790
  "model.layers.78.self_attn.indexer.weights_proj",
791
  "model.layers.78.self_attn.indexer.wk",
792
  "model.layers.78.self_attn.indexer.wq_b",
 
796
  "model.layers.78.self_attn.q_a_proj",
797
  "model.layers.78.self_attn.q_b_proj",
798
  "model.layers.8.mlp.gate",
 
 
 
799
  "model.layers.8.self_attn.indexer.weights_proj",
800
  "model.layers.8.self_attn.indexer.wk",
801
  "model.layers.8.self_attn.indexer.wq_b",
 
805
  "model.layers.8.self_attn.q_a_proj",
806
  "model.layers.8.self_attn.q_b_proj",
807
  "model.layers.9.mlp.gate",
 
 
 
808
  "model.layers.9.self_attn.indexer.weights_proj",
809
  "model.layers.9.self_attn.indexer.wk",
810
  "model.layers.9.self_attn.indexer.wq_b",
 
822
  "kv_cache_quant_config": {},
823
  "kv_cache_post_rope": false,
824
  "quant_mode": "eager_mode",
825
+ "version": "0.12+4f9d2296257",
826
  "export": {
827
  "kv_cache_group": [],
828
  "min_kv_scale": 0.0,
model-00005-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc8c329a5a9c83a678da7f9e02fed250f1372a9baefffaad01120adfda551b4b
3
- size 1737703968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a26940b6f9473db94179144a15fe63cbfec50ecf5810476713e0cb8080dffb94
3
+ size 1682260856
model-00009-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a168b828b2aa13eff829b9fc91b62d84ba55c2262e941b3e0e6c09bb10f8be0c
3
- size 1737704104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d56e5119dca52c43d9ce31a1f1064923ef2e8202739acb5b1ff5b90a9e01f0e
3
+ size 1682260992
model-00012-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e84e133b5f5178b1d545f3987354e3ccdef25893dfc3db374b9bed6c0f07564
3
- size 1463997064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5002e1ff5310504dd2531d6e613c57277dda63c4c02caf04afb3113136defdcb
3
+ size 1427034960
model-00013-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee04729368cd144dbee1d0293fc019262b68719fb14135ef0bfcf8699a9b96d8
3
- size 1697595184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ade48d852d60663273770aad7bc351a5b7fa850b1af6ba73fb4e273e2ac5efe
3
+ size 1679114128
model-00016-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85929ef0c5fddc04dca3c4c970661beefcfc806c639175cd0879db37990a39d1
3
- size 1737703944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a85f66294ada19805d5c6ec0727f15a4f3aa767ba0d635a749fefa6232dc136c
3
+ size 1682260832
model-00020-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95ac78d6c4f82520cdf579ecf6fa3de02bb6deae854f523f429927c7e175fea1
3
- size 1737704080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e6ade5c8835ddd304840e568a17b344c7673e834a345598ff8489207bac6d1a
3
+ size 1682260968
model-00024-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dcdf5e8827c7ff29073f347d6469bf81cc6c87d34a15f922cfc272ad0204768
3
- size 1737704208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf870134763fedf5defb298ec6d3342b7366c6ae3ceb4c7779f1044659d101d3
3
+ size 1682261096
model-00027-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a2933fa278d54b927b17712a0288881d4cee84c8aeae6f51f6b0fe5b6b69f37
3
- size 1737703928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4127b76fdd507616a5f93b9d16131f162804098c66aba63bfc05ed005138518
3
+ size 1682260816
model-00031-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f29561964396e1ca591a1364e3c8e5b367d4811535f9d1e8f34ef70509c8255e
3
- size 1737704056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bbf7e880aff7f656ac96a927ee8467554096615ab15fd29def1d9a524b8f17c
3
+ size 1682260944
model-00035-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fc5dfbc9eb03d2b03f01ad940181dcd2a8a20052608ce018dab286e2801d3ae
3
- size 1737704184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c58f27a3677f3457973dcc1fc7d422dec1d1c4c612d150859d68e63435038a8c
3
+ size 1682261072
model-00038-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1056460f7a62d1d87fe59bd78ca059636fb386daf3b2f3c2eef39e431f2f45f0
3
- size 2260038664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:907e3c6c2725a6abe9e83e5f0de7f09c56e3241fbcebcc141262333d7215b114
3
+ size 2204595600
model-00042-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fadee28237027195440ca9060d61b775643cdf8069994423c174d029322fb4f
3
- size 1737703968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b61be60b23afe1c1a755114260b96f54500b0f094eefdea454da6c67c5f8fe71
3
+ size 1682260848
model-00046-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a65c4c62366b02c04f356f71921833f4f39280475b618042d414f74786ed8e6e
3
- size 1737704104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:080ff780e2983db8bcc6a8865da8b8374df552dafd26233d67ca30206356437a
3
+ size 1682260992
model-00049-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fe94fbdeecb5ceee200704747e2a9b0f7a7178afc7eca27523d38273d47577e
3
- size 1445516008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0989339b49451487e78f19bbcbee17e995a4ffa04520d229e1218b400e81a9c
3
+ size 1427034952
model-00050-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c25eedbdd6843c3ffc52f0ca9e5ede35353f5bab25f204b99f78dd499a2cc5f8
3
- size 1716076232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:867476ef4b292c18711b45871340d23ec9b9e285cc234ad1818ccd845392dc64
3
+ size 1679114136
model-00053-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de232a272294b0bf711d7ba4899350fd345dd65860276db77bfdf8b4d5965627
3
- size 1737703944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bca11cf1231578e0485be38ac5d7fc0544b646af627850d39403084bfd2d9921
3
+ size 1682260824
model-00057-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6abac992f5365d82587df1ba1125d5ee1d6f58696ea3bc6d4fb17a7080fe33a6
3
- size 1737704080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff9428e85a99b88164af6403d8fab1e6c4a699651eb5ebd91c455b88e990a83
3
+ size 1682260968
model-00061-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf64238281755cef465ce9fe48a7ef9146bd8b8fc251fd13cffec87f7a7add7b
3
- size 1737704216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efa5126807b7bc25a590f9ee0c02516137657ce006a104d8f7b54dcbd1b17375
3
+ size 1682261096
model-00064-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e040342a3c2ceaeca0eafdeb5fdf90576dab6dfd4526576b3383ef9f9e8a946
3
- size 1737703920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:867a8b3e8625da9dd1b1aeac6691ba540322de5a6e3b5a0936f40e5c4df7fc0f
3
+ size 1682260808
model-00068-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75ab3971c913dad9b661d9e34973ce3869c1954c32116dc8cb26672ae9119d1b
3
- size 1737704056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce867f0559fe9dd775277289ff3dc54b90b06dfb19f91a1f82462aba81a8e7ec
3
+ size 1682260944
model-00072-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38e2e4ec17c04e96054acf45df173eda96602bb3b4eaca6345a5b8d744a62845
3
- size 1737704192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adea37d14131466cde43ec290f49b91ddd55d6d0d5e884ce7aed82020aebef57
3
+ size 1682261080
model-00075-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9a4e373b930792e2c8ed87f32226e303259420ad47cf142215668c6dd313a73
3
- size 1737703840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36296ded6050f0883702ebf714ce3e071da038ebefc73ce4b1f9777a54fc0843
3
+ size 1682260728
model-00079-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e826f51fc7c5225d4f611d624051418262c4171775140f03130326e271a5d67b
3
- size 1737703808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a9a85d5771f0d9531f6bc91a3e8e6d75d3c1d30c6d6b847ba082207d3a9cde
3
+ size 1682260688
model-00083-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b74cbd487dc3f7c14df682cb9934e00f1c7b219a17e9260328554f8cbd4a856
3
- size 1737704168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39d7827f3b63df872dc668d129da2216854a1f130e01018f1fc0d11033d5777e
3
+ size 1682261048
model-00086-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7dea58401a8f715a6226a234024900544a1696158fd86a7ece4602b7b02b860
3
- size 1737703872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bb4af0115f0ffd8ef01b5328b0afde0dcc53c6b63433608596590954467dc08
3
+ size 1682260752
model-00090-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4754b87dca269f473a43c78959d2394a76648740cfa6692a6c95711d07853e32
3
- size 1737704008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47c0ca34a40a1af1d8dafdcf9306932b351b78b369546f32fd7d2de104363836
3
+ size 1682260896
model-00094-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce2caa8ada76cedd00a94ad2b5a4588c1368a372bc99add01b5748fd3dae4011
3
- size 1737704144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4e30e31ffe517e43138aee025a41b99f4d51088ea7051bdd0099758d0bbd2ea
3
+ size 1682261024
model-00097-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13cf90375e2fc4b3974fda949c1a557e95a7ac5505aad9676cc07b781fa86962
3
- size 1737703864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ec1eec1e77f187177d357f1356aa85303b247cadb1eb6dab9f8f79c40bd310e
3
+ size 1682260752
model-00101-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c345cc5fe338a7af7c7305bc86c48fe29c69e485174c20ce4dc65003c6b66544
3
- size 1737703992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b37e728dae61af0239a92e43d7264f2feace92b0db887094099823847fef69f
3
+ size 1682260872
model-00105-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bd59695f0fda2fbf3b4901d72829949015d69f1f150854800ae859e21dfce8a
3
- size 1737704120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1c113c165c316ae88b0a7b3463536d057701b3f7e14e905783a5db86ce083de
3
+ size 1682261008
model-00108-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea3cd8402fae5411f224ade006916db6e3ac40cd4ea17482c3491a3a4858dc5d
3
- size 1504249752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83ccbb66c29c24cb03a251c83510c9a649d2b9d22e7e89b75ba6beff61b5ef64
3
+ size 1448806616
model-00112-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:791955e071af8f61889e06e8953e7debc7eb97c5e0add748230a7e84b4191209
3
- size 1737703952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9469e434fa0342336d5b2e329f9664a066b672171ee0d2c6a6f437c26837f507
3
+ size 1682260840
model-00116-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afaf09ea013da6ba7612a759fc0827cfe05c11b6356f841c3a2e9f246d43b518
3
- size 1737703840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3193be43befe2da90df59608f55674c2162a6ecfb51bca07b5600203419b9051
3
+ size 1682260728
model-00120-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:330de3e9ba98ab236b81e42ee5df965c3db720773eaced3b23ea0afab6db4fac
3
- size 1737704184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49f276721893ac4b2c3f130fbee66959c729f568571e25ef37a772143975006d
3
+ size 1682261072
model-00123-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2eae656b6818f3bd54985b642a20cab861a52dceedcced0a9c4d1a96fbd21597
3
- size 1737703928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ada0068dbc11c6ab61ddc3461e4952a1e584f25d483ee8c37a8d933576010d2
3
+ size 1682260816
model-00127-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5174ea701966fb6fe6c6c988eef665fe9ad39c77540567908a12cb567d463da
3
- size 1737704064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e163d99e1677296b904fd17df746f154b2fb5e41a7a60715a93667114b31d449
3
+ size 1682260952
model-00131-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6da7f4bd19d8bf3b1ee16e590b2a3afbfda078173a0f798a0ce1eed23697dcea
3
- size 1737704192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5d5d6210a3d237ebd726fc2a9a4d486eeaa2b8dd8c0f3b3ce7c6288cb13bea1
3
+ size 1682261080
model-00134-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3c432d0919dae37ed4376b16d604dca6f950a9d01d9fb78e60819fcd00a2859
3
- size 1737703904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afa0218e000b727b02f49ed91d351f35c5249939c190f765ebe5411c25a2f0e7
3
+ size 1682260792
model-00138-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67fe316ec47c64166798212128cb54f0bfd0f8d85d6660d352e3df5d9bc5221a
3
- size 1737704040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c2cbed6245333571e089fc7d034faf0f84492b1741b9080a73340edc8ac6005
3
+ size 1682260928
model-00142-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ccfc435a5a4f437bc37e0e07b2215e0f0035284d2edbf6e664177a35f816387
3
- size 1737704176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd930b09b631bd569888a8c110fbf7051d6e63e4990c1e929a94c950b13d5adf
3
+ size 1682261056
model-00145-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca3b46c5d8e94efef2368078c43053ad92f70e50e7717563152550ead89a185b
3
- size 1737703880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:815f57c811220dc80408eb9f3631a7e39a950e4d4a071b532aa027f27869aeca
3
+ size 1682260768
model-00149-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a8f0ddc92572885888859cd931bfd9973c0962a9410b3021d539ea48dda0b77
3
- size 1737704016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e48d30fccc583239d7889f4ff3954980c3e21be4152a849c7c7ec1d9d03a861
3
+ size 1682260904
model-00153-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3043bb35774665f9c6a24fdefb26c69c78afe90782ee02faec89f8643e2d03d7
3
- size 1737704144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3edfbbaffd8736739bbcc329a64120ac10b1870022ad636cdc18dc317e2c5fa
3
+ size 1682261032
model-00156-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7fbab3a720a1316719dcb806f4df3fad08fc68b7bb4b6e531c79ff7ce593a53
3
- size 1737703848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12f73ca5e501b241930ade2ae457ea7aacfa16438c74b2370d0ff323114ec4e0
3
+ size 1682260728
model-00160-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eacc6dd6de00bba38ee334a0dc333249e3607c25011d53db57396a5b51701ef8
3
- size 1737703728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04a295a181f0d879019b6c10537ea89b5a02e4b841589d2586ea74f584cdb901
3
+ size 1682260616
model-00164-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:124b46932db66c81c40baa74b13006bd2fb0aa0982931faf53e738b72754469a
3
- size 1737704120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69b81239538f757d0580c40a9bf3ce003917af873271d98ce28191ac5ae29613
3
+ size 1682261008
model-00167-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f77afcbea7ec5ac149035df1239296505f85b593f009edc310ec470c7af59655
3
- size 1477510064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c3dca1b5a95cb791c5d485f2f94973638de8060cf5f4d4dc6afe4ef8e8ae6e4
3
+ size 1422066928
model-00171-of-00282.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:606d62a76680d637be5cca50df5d9db924ad682125bab09bbb7dc28c8fcb8ca6
3
- size 1737703952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51214bfed6a6953169cc552f78c5aeb71c852196c1eecca2b5b298e1a5a7db1b
3
+ size 1682260840