NicoNico commited on
Commit
2f3b589
1 Parent(s): d37a5d6

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. config.json +1 -1
  2. model.safetensors +2 -2
  3. quant_strategy.json +204 -204
config.json CHANGED
@@ -75,7 +75,7 @@
75
  "top_p": 1.0,
76
  "torch_dtype": "float16",
77
  "torchscript": false,
78
- "transformers_version": "4.39.0.dev0",
79
  "typical_p": 1.0,
80
  "use_bfloat16": false,
81
  "use_cache": true,
 
75
  "top_p": 1.0,
76
  "torch_dtype": "float16",
77
  "torchscript": false,
78
+ "transformers_version": "4.39.3",
79
  "typical_p": 1.0,
80
  "use_bfloat16": false,
81
  "use_cache": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fb7babed0b95829312c0b5b23ad139272c81b45823a1c77af17e6911d72c2dc
3
- size 3437638472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa641a52062979b2cf479742ed3e87215faf2b261840a917196a2ffa67d2790
3
+ size 3437638695
quant_strategy.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "measurement": {
3
  "model.layers.0": {
4
- "accuracy": 0.9328658580780029,
5
- "total_bits": 648037632,
6
  "q_proj": {
7
  "group_size": {
8
- "4": 128
9
  },
10
  "bits": [
11
- 4
12
  ],
13
  "bits_prop": [
14
  1
@@ -17,10 +17,10 @@
17
  },
18
  "k_proj": {
19
  "group_size": {
20
- "4": 128
21
  },
22
  "bits": [
23
- 4
24
  ],
25
  "bits_prop": [
26
  1
@@ -41,10 +41,10 @@
41
  },
42
  "o_proj": {
43
  "group_size": {
44
- "4": 128
45
  },
46
  "bits": [
47
- 4
48
  ],
49
  "bits_prop": [
50
  1
@@ -89,14 +89,14 @@
89
  }
90
  },
91
  "model.layers.1": {
92
- "accuracy": 0.9108891487121582,
93
- "total_bits": 648037632,
94
  "q_proj": {
95
  "group_size": {
96
- "4": 128
97
  },
98
  "bits": [
99
- 4
100
  ],
101
  "bits_prop": [
102
  1
@@ -105,10 +105,10 @@
105
  },
106
  "k_proj": {
107
  "group_size": {
108
- "4": 128
109
  },
110
  "bits": [
111
- 4
112
  ],
113
  "bits_prop": [
114
  1
@@ -129,10 +129,10 @@
129
  },
130
  "o_proj": {
131
  "group_size": {
132
- "4": 128
133
  },
134
  "bits": [
135
- 4
136
  ],
137
  "bits_prop": [
138
  1
@@ -177,7 +177,7 @@
177
  }
178
  },
179
  "model.layers.2": {
180
- "accuracy": 0.9520481824874878,
181
  "total_bits": 458124288,
182
  "q_proj": {
183
  "group_size": {
@@ -265,7 +265,7 @@
265
  }
266
  },
267
  "model.layers.3": {
268
- "accuracy": 0.9584293961524963,
269
  "total_bits": 466380288,
270
  "q_proj": {
271
  "group_size": {
@@ -353,8 +353,8 @@
353
  }
354
  },
355
  "model.layers.4": {
356
- "accuracy": 0.9519124627113342,
357
- "total_bits": 466380288,
358
  "q_proj": {
359
  "group_size": {
360
  "2": 64
@@ -369,10 +369,10 @@
369
  },
370
  "k_proj": {
371
  "group_size": {
372
- "4": 128
373
  },
374
  "bits": [
375
- 4
376
  ],
377
  "bits_prop": [
378
  1
@@ -441,7 +441,7 @@
441
  }
442
  },
443
  "model.layers.5": {
444
- "accuracy": 0.9403245449066162,
445
  "total_bits": 458124288,
446
  "q_proj": {
447
  "group_size": {
@@ -529,8 +529,8 @@
529
  }
530
  },
531
  "model.layers.6": {
532
- "accuracy": 0.9401131868362427,
533
- "total_bits": 491152896,
534
  "q_proj": {
535
  "group_size": {
536
  "2": 64
@@ -569,10 +569,10 @@
569
  },
570
  "o_proj": {
571
  "group_size": {
572
- "4": 128
573
  },
574
  "bits": [
575
- 4
576
  ],
577
  "bits_prop": [
578
  1
@@ -617,8 +617,8 @@
617
  }
618
  },
619
  "model.layers.7": {
620
- "accuracy": 0.9464203119277954,
621
- "total_bits": 606753024,
622
  "q_proj": {
623
  "group_size": {
624
  "2": 64
@@ -633,10 +633,10 @@
633
  },
634
  "k_proj": {
635
  "group_size": {
636
- "2": 64
637
  },
638
  "bits": [
639
- 2
640
  ],
641
  "bits_prop": [
642
  1
@@ -657,10 +657,10 @@
657
  },
658
  "o_proj": {
659
  "group_size": {
660
- "4": 128
661
  },
662
  "bits": [
663
- 4
664
  ],
665
  "bits_prop": [
666
  1
@@ -693,10 +693,10 @@
693
  },
694
  "down_proj": {
695
  "group_size": {
696
- "4": 128
697
  },
698
  "bits": [
699
- 4
700
  ],
701
  "bits_prop": [
702
  1
@@ -705,14 +705,14 @@
705
  }
706
  },
707
  "model.layers.8": {
708
- "accuracy": 0.9558004140853882,
709
- "total_bits": 639781632,
710
  "q_proj": {
711
  "group_size": {
712
- "4": 128
713
  },
714
  "bits": [
715
- 4
716
  ],
717
  "bits_prop": [
718
  1
@@ -721,10 +721,10 @@
721
  },
722
  "k_proj": {
723
  "group_size": {
724
- "2": 64
725
  },
726
  "bits": [
727
- 2
728
  ],
729
  "bits_prop": [
730
  1
@@ -745,10 +745,10 @@
745
  },
746
  "o_proj": {
747
  "group_size": {
748
- "4": 128
749
  },
750
  "bits": [
751
- 4
752
  ],
753
  "bits_prop": [
754
  1
@@ -781,10 +781,10 @@
781
  },
782
  "down_proj": {
783
  "group_size": {
784
- "4": 128
785
  },
786
  "bits": [
787
- 4
788
  ],
789
  "bits_prop": [
790
  1
@@ -793,14 +793,14 @@
793
  }
794
  },
795
  "model.layers.9": {
796
- "accuracy": 0.9580546617507935,
797
- "total_bits": 648037632,
798
  "q_proj": {
799
  "group_size": {
800
- "4": 128
801
  },
802
  "bits": [
803
- 4
804
  ],
805
  "bits_prop": [
806
  1
@@ -809,10 +809,10 @@
809
  },
810
  "k_proj": {
811
  "group_size": {
812
- "4": 128
813
  },
814
  "bits": [
815
- 4
816
  ],
817
  "bits_prop": [
818
  1
@@ -833,10 +833,10 @@
833
  },
834
  "o_proj": {
835
  "group_size": {
836
- "4": 128
837
  },
838
  "bits": [
839
- 4
840
  ],
841
  "bits_prop": [
842
  1
@@ -869,10 +869,10 @@
869
  },
870
  "down_proj": {
871
  "group_size": {
872
- "4": 128
873
  },
874
  "bits": [
875
- 4
876
  ],
877
  "bits_prop": [
878
  1
@@ -881,14 +881,14 @@
881
  }
882
  },
883
  "model.layers.10": {
884
- "accuracy": 0.9503617286682129,
885
- "total_bits": 639781632,
886
  "q_proj": {
887
  "group_size": {
888
- "4": 128
889
  },
890
  "bits": [
891
- 4
892
  ],
893
  "bits_prop": [
894
  1
@@ -897,10 +897,10 @@
897
  },
898
  "k_proj": {
899
  "group_size": {
900
- "2": 64
901
  },
902
  "bits": [
903
- 2
904
  ],
905
  "bits_prop": [
906
  1
@@ -921,10 +921,10 @@
921
  },
922
  "o_proj": {
923
  "group_size": {
924
- "4": 128
925
  },
926
  "bits": [
927
- 4
928
  ],
929
  "bits_prop": [
930
  1
@@ -957,10 +957,10 @@
957
  },
958
  "down_proj": {
959
  "group_size": {
960
- "4": 128
961
  },
962
  "bits": [
963
- 4
964
  ],
965
  "bits_prop": [
966
  1
@@ -969,8 +969,8 @@
969
  }
970
  },
971
  "model.layers.11": {
972
- "accuracy": 0.9408208131790161,
973
- "total_bits": 606753024,
974
  "q_proj": {
975
  "group_size": {
976
  "2": 64
@@ -985,10 +985,10 @@
985
  },
986
  "k_proj": {
987
  "group_size": {
988
- "2": 64
989
  },
990
  "bits": [
991
- 2
992
  ],
993
  "bits_prop": [
994
  1
@@ -1009,10 +1009,10 @@
1009
  },
1010
  "o_proj": {
1011
  "group_size": {
1012
- "4": 128
1013
  },
1014
  "bits": [
1015
- 4
1016
  ],
1017
  "bits_prop": [
1018
  1
@@ -1045,10 +1045,10 @@
1045
  },
1046
  "down_proj": {
1047
  "group_size": {
1048
- "4": 128
1049
  },
1050
  "bits": [
1051
- 4
1052
  ],
1053
  "bits_prop": [
1054
  1
@@ -1057,14 +1057,14 @@
1057
  }
1058
  },
1059
  "model.layers.12": {
1060
- "accuracy": 0.9455924034118652,
1061
- "total_bits": 639781632,
1062
  "q_proj": {
1063
  "group_size": {
1064
- "4": 128
1065
  },
1066
  "bits": [
1067
- 4
1068
  ],
1069
  "bits_prop": [
1070
  1
@@ -1073,10 +1073,10 @@
1073
  },
1074
  "k_proj": {
1075
  "group_size": {
1076
- "2": 64
1077
  },
1078
  "bits": [
1079
- 2
1080
  ],
1081
  "bits_prop": [
1082
  1
@@ -1097,10 +1097,10 @@
1097
  },
1098
  "o_proj": {
1099
  "group_size": {
1100
- "4": 128
1101
  },
1102
  "bits": [
1103
- 4
1104
  ],
1105
  "bits_prop": [
1106
  1
@@ -1133,10 +1133,10 @@
1133
  },
1134
  "down_proj": {
1135
  "group_size": {
1136
- "4": 128
1137
  },
1138
  "bits": [
1139
- 4
1140
  ],
1141
  "bits_prop": [
1142
  1
@@ -1145,14 +1145,14 @@
1145
  }
1146
  },
1147
  "model.layers.13": {
1148
- "accuracy": 0.9480124711990356,
1149
- "total_bits": 648037632,
1150
  "q_proj": {
1151
  "group_size": {
1152
- "4": 128
1153
  },
1154
  "bits": [
1155
- 4
1156
  ],
1157
  "bits_prop": [
1158
  1
@@ -1161,10 +1161,10 @@
1161
  },
1162
  "k_proj": {
1163
  "group_size": {
1164
- "4": 128
1165
  },
1166
  "bits": [
1167
- 4
1168
  ],
1169
  "bits_prop": [
1170
  1
@@ -1185,10 +1185,10 @@
1185
  },
1186
  "o_proj": {
1187
  "group_size": {
1188
- "4": 128
1189
  },
1190
  "bits": [
1191
- 4
1192
  ],
1193
  "bits_prop": [
1194
  1
@@ -1221,10 +1221,10 @@
1221
  },
1222
  "down_proj": {
1223
  "group_size": {
1224
- "4": 128
1225
  },
1226
  "bits": [
1227
- 4
1228
  ],
1229
  "bits_prop": [
1230
  1
@@ -1233,8 +1233,8 @@
1233
  }
1234
  },
1235
  "model.layers.14": {
1236
- "accuracy": 0.9390950202941895,
1237
- "total_bits": 615009024,
1238
  "q_proj": {
1239
  "group_size": {
1240
  "2": 64
@@ -1249,10 +1249,10 @@
1249
  },
1250
  "k_proj": {
1251
  "group_size": {
1252
- "4": 128
1253
  },
1254
  "bits": [
1255
- 4
1256
  ],
1257
  "bits_prop": [
1258
  1
@@ -1273,10 +1273,10 @@
1273
  },
1274
  "o_proj": {
1275
  "group_size": {
1276
- "4": 128
1277
  },
1278
  "bits": [
1279
- 4
1280
  ],
1281
  "bits_prop": [
1282
  1
@@ -1321,14 +1321,14 @@
1321
  }
1322
  },
1323
  "model.layers.15": {
1324
- "accuracy": 0.9407241344451904,
1325
- "total_bits": 648037632,
1326
  "q_proj": {
1327
  "group_size": {
1328
- "4": 128
1329
  },
1330
  "bits": [
1331
- 4
1332
  ],
1333
  "bits_prop": [
1334
  1
@@ -1337,10 +1337,10 @@
1337
  },
1338
  "k_proj": {
1339
  "group_size": {
1340
- "4": 128
1341
  },
1342
  "bits": [
1343
- 4
1344
  ],
1345
  "bits_prop": [
1346
  1
@@ -1361,10 +1361,10 @@
1361
  },
1362
  "o_proj": {
1363
  "group_size": {
1364
- "4": 128
1365
  },
1366
  "bits": [
1367
- 4
1368
  ],
1369
  "bits_prop": [
1370
  1
@@ -1409,14 +1409,14 @@
1409
  }
1410
  },
1411
  "model.layers.16": {
1412
- "accuracy": 0.9330849647521973,
1413
- "total_bits": 648037632,
1414
  "q_proj": {
1415
  "group_size": {
1416
- "4": 128
1417
  },
1418
  "bits": [
1419
- 4
1420
  ],
1421
  "bits_prop": [
1422
  1
@@ -1497,14 +1497,14 @@
1497
  }
1498
  },
1499
  "model.layers.17": {
1500
- "accuracy": 0.9421899318695068,
1501
- "total_bits": 722356992,
1502
  "q_proj": {
1503
  "group_size": {
1504
- "2": 64
1505
  },
1506
  "bits": [
1507
- 2
1508
  ],
1509
  "bits_prop": [
1510
  1
@@ -1513,10 +1513,10 @@
1513
  },
1514
  "k_proj": {
1515
  "group_size": {
1516
- "2": 64
1517
  },
1518
  "bits": [
1519
- 2
1520
  ],
1521
  "bits_prop": [
1522
  1
@@ -1549,10 +1549,10 @@
1549
  },
1550
  "up_proj": {
1551
  "group_size": {
1552
- "4": 128
1553
  },
1554
  "bits": [
1555
- 4
1556
  ],
1557
  "bits_prop": [
1558
  1
@@ -1585,7 +1585,7 @@
1585
  }
1586
  },
1587
  "model.layers.18": {
1588
- "accuracy": 0.9388406276702881,
1589
  "total_bits": 722356992,
1590
  "q_proj": {
1591
  "group_size": {
@@ -1673,14 +1673,14 @@
1673
  }
1674
  },
1675
  "model.layers.19": {
1676
- "accuracy": 0.9384818077087402,
1677
- "total_bits": 722356992,
1678
  "q_proj": {
1679
  "group_size": {
1680
- "2": 64
1681
  },
1682
  "bits": [
1683
- 2
1684
  ],
1685
  "bits_prop": [
1686
  1
@@ -1761,8 +1761,8 @@
1761
  }
1762
  },
1763
  "model.layers.20": {
1764
- "accuracy": 0.9433858394622803,
1765
- "total_bits": 722356992,
1766
  "q_proj": {
1767
  "group_size": {
1768
  "2": 64
@@ -1777,10 +1777,10 @@
1777
  },
1778
  "k_proj": {
1779
  "group_size": {
1780
- "2": 64
1781
  },
1782
  "bits": [
1783
- 2
1784
  ],
1785
  "bits_prop": [
1786
  1
@@ -1825,10 +1825,10 @@
1825
  },
1826
  "gate_proj": {
1827
  "group_size": {
1828
- "2": 64
1829
  },
1830
  "bits": [
1831
- 2
1832
  ],
1833
  "bits_prop": [
1834
  1
@@ -1849,8 +1849,8 @@
1849
  }
1850
  },
1851
  "model.layers.21": {
1852
- "accuracy": 0.9452800750732422,
1853
- "total_bits": 722356992,
1854
  "q_proj": {
1855
  "group_size": {
1856
  "2": 64
@@ -1865,10 +1865,10 @@
1865
  },
1866
  "k_proj": {
1867
  "group_size": {
1868
- "2": 64
1869
  },
1870
  "bits": [
1871
- 2
1872
  ],
1873
  "bits_prop": [
1874
  1
@@ -1913,10 +1913,10 @@
1913
  },
1914
  "gate_proj": {
1915
  "group_size": {
1916
- "2": 64
1917
  },
1918
  "bits": [
1919
- 2
1920
  ],
1921
  "bits_prop": [
1922
  1
@@ -1937,8 +1937,8 @@
1937
  }
1938
  },
1939
  "model.layers.22": {
1940
- "accuracy": 0.9475953578948975,
1941
- "total_bits": 722356992,
1942
  "q_proj": {
1943
  "group_size": {
1944
  "2": 64
@@ -1953,10 +1953,10 @@
1953
  },
1954
  "k_proj": {
1955
  "group_size": {
1956
- "2": 64
1957
  },
1958
  "bits": [
1959
- 2
1960
  ],
1961
  "bits_prop": [
1962
  1
@@ -2001,10 +2001,10 @@
2001
  },
2002
  "gate_proj": {
2003
  "group_size": {
2004
- "2": 64
2005
  },
2006
  "bits": [
2007
- 2
2008
  ],
2009
  "bits_prop": [
2010
  1
@@ -2025,8 +2025,8 @@
2025
  }
2026
  },
2027
  "model.layers.23": {
2028
- "accuracy": 0.9454758167266846,
2029
- "total_bits": 722356992,
2030
  "q_proj": {
2031
  "group_size": {
2032
  "2": 64
@@ -2041,10 +2041,10 @@
2041
  },
2042
  "k_proj": {
2043
  "group_size": {
2044
- "2": 64
2045
  },
2046
  "bits": [
2047
- 2
2048
  ],
2049
  "bits_prop": [
2050
  1
@@ -2089,10 +2089,10 @@
2089
  },
2090
  "gate_proj": {
2091
  "group_size": {
2092
- "2": 64
2093
  },
2094
  "bits": [
2095
- 2
2096
  ],
2097
  "bits_prop": [
2098
  1
@@ -2113,8 +2113,8 @@
2113
  }
2114
  },
2115
  "model.layers.24": {
2116
- "accuracy": 0.9459505081176758,
2117
- "total_bits": 722356992,
2118
  "q_proj": {
2119
  "group_size": {
2120
  "2": 64
@@ -2129,10 +2129,10 @@
2129
  },
2130
  "k_proj": {
2131
  "group_size": {
2132
- "2": 64
2133
  },
2134
  "bits": [
2135
- 2
2136
  ],
2137
  "bits_prop": [
2138
  1
@@ -2177,10 +2177,10 @@
2177
  },
2178
  "gate_proj": {
2179
  "group_size": {
2180
- "2": 64
2181
  },
2182
  "bits": [
2183
- 2
2184
  ],
2185
  "bits_prop": [
2186
  1
@@ -2201,8 +2201,8 @@
2201
  }
2202
  },
2203
  "model.layers.25": {
2204
- "accuracy": 0.9440786838531494,
2205
- "total_bits": 722356992,
2206
  "q_proj": {
2207
  "group_size": {
2208
  "2": 64
@@ -2217,10 +2217,10 @@
2217
  },
2218
  "k_proj": {
2219
  "group_size": {
2220
- "2": 64
2221
  },
2222
  "bits": [
2223
- 2
2224
  ],
2225
  "bits_prop": [
2226
  1
@@ -2265,10 +2265,10 @@
2265
  },
2266
  "gate_proj": {
2267
  "group_size": {
2268
- "2": 64
2269
  },
2270
  "bits": [
2271
- 2
2272
  ],
2273
  "bits_prop": [
2274
  1
@@ -2289,14 +2289,14 @@
2289
  }
2290
  },
2291
  "model.layers.26": {
2292
- "accuracy": 0.9433612823486328,
2293
- "total_bits": 722356992,
2294
  "q_proj": {
2295
  "group_size": {
2296
- "2": 64
2297
  },
2298
  "bits": [
2299
- 2
2300
  ],
2301
  "bits_prop": [
2302
  1
@@ -2305,10 +2305,10 @@
2305
  },
2306
  "k_proj": {
2307
  "group_size": {
2308
- "2": 64
2309
  },
2310
  "bits": [
2311
- 2
2312
  ],
2313
  "bits_prop": [
2314
  1
@@ -2353,10 +2353,10 @@
2353
  },
2354
  "gate_proj": {
2355
  "group_size": {
2356
- "2": 64
2357
  },
2358
  "bits": [
2359
- 2
2360
  ],
2361
  "bits_prop": [
2362
  1
@@ -2377,14 +2377,14 @@
2377
  }
2378
  },
2379
  "model.layers.27": {
2380
- "accuracy": 0.9458332061767578,
2381
- "total_bits": 722356992,
2382
  "q_proj": {
2383
  "group_size": {
2384
- "2": 64
2385
  },
2386
  "bits": [
2387
- 2
2388
  ],
2389
  "bits_prop": [
2390
  1
@@ -2393,10 +2393,10 @@
2393
  },
2394
  "k_proj": {
2395
  "group_size": {
2396
- "2": 64
2397
  },
2398
  "bits": [
2399
- 2
2400
  ],
2401
  "bits_prop": [
2402
  1
@@ -2429,10 +2429,10 @@
2429
  },
2430
  "up_proj": {
2431
  "group_size": {
2432
- "2": 64
2433
  },
2434
  "bits": [
2435
- 2
2436
  ],
2437
  "bits_prop": [
2438
  1
@@ -2465,14 +2465,14 @@
2465
  }
2466
  },
2467
  "model.layers.28": {
2468
- "accuracy": 0.9405984878540039,
2469
- "total_bits": 722356992,
2470
  "q_proj": {
2471
  "group_size": {
2472
- "2": 64
2473
  },
2474
  "bits": [
2475
- 2
2476
  ],
2477
  "bits_prop": [
2478
  1
@@ -2481,10 +2481,10 @@
2481
  },
2482
  "k_proj": {
2483
  "group_size": {
2484
- "2": 64
2485
  },
2486
  "bits": [
2487
- 2
2488
  ],
2489
  "bits_prop": [
2490
  1
@@ -2529,10 +2529,10 @@
2529
  },
2530
  "gate_proj": {
2531
  "group_size": {
2532
- "2": 64
2533
  },
2534
  "bits": [
2535
- 2
2536
  ],
2537
  "bits_prop": [
2538
  1
@@ -2553,14 +2553,14 @@
2553
  }
2554
  },
2555
  "model.layers.29": {
2556
- "accuracy": 0.9377729892730713,
2557
- "total_bits": 722356992,
2558
  "q_proj": {
2559
  "group_size": {
2560
- "2": 64
2561
  },
2562
  "bits": [
2563
- 2
2564
  ],
2565
  "bits_prop": [
2566
  1
@@ -2569,10 +2569,10 @@
2569
  },
2570
  "k_proj": {
2571
  "group_size": {
2572
- "2": 64
2573
  },
2574
  "bits": [
2575
- 2
2576
  ],
2577
  "bits_prop": [
2578
  1
@@ -2605,10 +2605,10 @@
2605
  },
2606
  "up_proj": {
2607
  "group_size": {
2608
- "2": 64
2609
  },
2610
  "bits": [
2611
- 2
2612
  ],
2613
  "bits_prop": [
2614
  1
@@ -2641,14 +2641,14 @@
2641
  }
2642
  },
2643
  "model.layers.30": {
2644
- "accuracy": 0.934866189956665,
2645
- "total_bits": 722356992,
2646
  "q_proj": {
2647
  "group_size": {
2648
- "2": 64
2649
  },
2650
  "bits": [
2651
- 2
2652
  ],
2653
  "bits_prop": [
2654
  1
@@ -2657,10 +2657,10 @@
2657
  },
2658
  "k_proj": {
2659
  "group_size": {
2660
- "2": 64
2661
  },
2662
  "bits": [
2663
- 2
2664
  ],
2665
  "bits_prop": [
2666
  1
@@ -2693,10 +2693,10 @@
2693
  },
2694
  "up_proj": {
2695
  "group_size": {
2696
- "2": 64
2697
  },
2698
  "bits": [
2699
- 2
2700
  ],
2701
  "bits_prop": [
2702
  1
@@ -2729,14 +2729,14 @@
2729
  }
2730
  },
2731
  "model.layers.31": {
2732
- "accuracy": 0.9650318622589111,
2733
- "total_bits": 846216960,
2734
  "q_proj": {
2735
  "group_size": {
2736
- "2": 64
2737
  },
2738
  "bits": [
2739
- 2
2740
  ],
2741
  "bits_prop": [
2742
  1
 
1
  {
2
  "measurement": {
3
  "model.layers.0": {
4
+ "accuracy": 0.8820657730102539,
5
+ "total_bits": 573724416,
6
  "q_proj": {
7
  "group_size": {
8
+ "2": 64
9
  },
10
  "bits": [
11
+ 2
12
  ],
13
  "bits_prop": [
14
  1
 
17
  },
18
  "k_proj": {
19
  "group_size": {
20
+ "2": 64
21
  },
22
  "bits": [
23
+ 2
24
  ],
25
  "bits_prop": [
26
  1
 
41
  },
42
  "o_proj": {
43
  "group_size": {
44
+ "2": 64
45
  },
46
  "bits": [
47
+ 2
48
  ],
49
  "bits_prop": [
50
  1
 
89
  }
90
  },
91
  "model.layers.1": {
92
+ "accuracy": 0.8954000473022461,
93
+ "total_bits": 573724416,
94
  "q_proj": {
95
  "group_size": {
96
+ "2": 64
97
  },
98
  "bits": [
99
+ 2
100
  ],
101
  "bits_prop": [
102
  1
 
105
  },
106
  "k_proj": {
107
  "group_size": {
108
+ "2": 64
109
  },
110
  "bits": [
111
+ 2
112
  ],
113
  "bits_prop": [
114
  1
 
129
  },
130
  "o_proj": {
131
  "group_size": {
132
+ "2": 64
133
  },
134
  "bits": [
135
+ 2
136
  ],
137
  "bits_prop": [
138
  1
 
177
  }
178
  },
179
  "model.layers.2": {
180
+ "accuracy": 0.9489546418190002,
181
  "total_bits": 458124288,
182
  "q_proj": {
183
  "group_size": {
 
265
  }
266
  },
267
  "model.layers.3": {
268
+ "accuracy": 0.9551604390144348,
269
  "total_bits": 466380288,
270
  "q_proj": {
271
  "group_size": {
 
353
  }
354
  },
355
  "model.layers.4": {
356
+ "accuracy": 0.948026180267334,
357
+ "total_bits": 458124288,
358
  "q_proj": {
359
  "group_size": {
360
  "2": 64
 
369
  },
370
  "k_proj": {
371
  "group_size": {
372
+ "2": 64
373
  },
374
  "bits": [
375
+ 2
376
  ],
377
  "bits_prop": [
378
  1
 
441
  }
442
  },
443
  "model.layers.5": {
444
+ "accuracy": 0.938827395439148,
445
  "total_bits": 458124288,
446
  "q_proj": {
447
  "group_size": {
 
529
  }
530
  },
531
  "model.layers.6": {
532
+ "accuracy": 0.9337625503540039,
533
+ "total_bits": 458124288,
534
  "q_proj": {
535
  "group_size": {
536
  "2": 64
 
569
  },
570
  "o_proj": {
571
  "group_size": {
572
+ "2": 64
573
  },
574
  "bits": [
575
+ 2
576
  ],
577
  "bits_prop": [
578
  1
 
617
  }
618
  },
619
  "model.layers.7": {
620
+ "accuracy": 0.9263930320739746,
621
+ "total_bits": 466380288,
622
  "q_proj": {
623
  "group_size": {
624
  "2": 64
 
633
  },
634
  "k_proj": {
635
  "group_size": {
636
+ "4": 128
637
  },
638
  "bits": [
639
+ 4
640
  ],
641
  "bits_prop": [
642
  1
 
657
  },
658
  "o_proj": {
659
  "group_size": {
660
+ "2": 64
661
  },
662
  "bits": [
663
+ 2
664
  ],
665
  "bits_prop": [
666
  1
 
693
  },
694
  "down_proj": {
695
  "group_size": {
696
+ "2": 64
697
  },
698
  "bits": [
699
+ 2
700
  ],
701
  "bits_prop": [
702
  1
 
705
  }
706
  },
707
  "model.layers.8": {
708
+ "accuracy": 0.9275798797607422,
709
+ "total_bits": 466380288,
710
  "q_proj": {
711
  "group_size": {
712
+ "2": 64
713
  },
714
  "bits": [
715
+ 2
716
  ],
717
  "bits_prop": [
718
  1
 
721
  },
722
  "k_proj": {
723
  "group_size": {
724
+ "4": 128
725
  },
726
  "bits": [
727
+ 4
728
  ],
729
  "bits_prop": [
730
  1
 
745
  },
746
  "o_proj": {
747
  "group_size": {
748
+ "2": 64
749
  },
750
  "bits": [
751
+ 2
752
  ],
753
  "bits_prop": [
754
  1
 
781
  },
782
  "down_proj": {
783
  "group_size": {
784
+ "2": 64
785
  },
786
  "bits": [
787
+ 2
788
  ],
789
  "bits_prop": [
790
  1
 
793
  }
794
  },
795
  "model.layers.9": {
796
+ "accuracy": 0.9196293354034424,
797
+ "total_bits": 458124288,
798
  "q_proj": {
799
  "group_size": {
800
+ "2": 64
801
  },
802
  "bits": [
803
+ 2
804
  ],
805
  "bits_prop": [
806
  1
 
809
  },
810
  "k_proj": {
811
  "group_size": {
812
+ "2": 64
813
  },
814
  "bits": [
815
+ 2
816
  ],
817
  "bits_prop": [
818
  1
 
833
  },
834
  "o_proj": {
835
  "group_size": {
836
+ "2": 64
837
  },
838
  "bits": [
839
+ 2
840
  ],
841
  "bits_prop": [
842
  1
 
869
  },
870
  "down_proj": {
871
  "group_size": {
872
+ "2": 64
873
  },
874
  "bits": [
875
+ 2
876
  ],
877
  "bits_prop": [
878
  1
 
881
  }
882
  },
883
  "model.layers.10": {
884
+ "accuracy": 0.9175989627838135,
885
+ "total_bits": 466380288,
886
  "q_proj": {
887
  "group_size": {
888
+ "2": 64
889
  },
890
  "bits": [
891
+ 2
892
  ],
893
  "bits_prop": [
894
  1
 
897
  },
898
  "k_proj": {
899
  "group_size": {
900
+ "4": 128
901
  },
902
  "bits": [
903
+ 4
904
  ],
905
  "bits_prop": [
906
  1
 
921
  },
922
  "o_proj": {
923
  "group_size": {
924
+ "2": 64
925
  },
926
  "bits": [
927
+ 2
928
  ],
929
  "bits_prop": [
930
  1
 
957
  },
958
  "down_proj": {
959
  "group_size": {
960
+ "2": 64
961
  },
962
  "bits": [
963
+ 2
964
  ],
965
  "bits_prop": [
966
  1
 
969
  }
970
  },
971
  "model.layers.11": {
972
+ "accuracy": 0.9152019023895264,
973
+ "total_bits": 466380288,
974
  "q_proj": {
975
  "group_size": {
976
  "2": 64
 
985
  },
986
  "k_proj": {
987
  "group_size": {
988
+ "4": 128
989
  },
990
  "bits": [
991
+ 4
992
  ],
993
  "bits_prop": [
994
  1
 
1009
  },
1010
  "o_proj": {
1011
  "group_size": {
1012
+ "2": 64
1013
  },
1014
  "bits": [
1015
+ 2
1016
  ],
1017
  "bits_prop": [
1018
  1
 
1045
  },
1046
  "down_proj": {
1047
  "group_size": {
1048
+ "2": 64
1049
  },
1050
  "bits": [
1051
+ 2
1052
  ],
1053
  "bits_prop": [
1054
  1
 
1057
  }
1058
  },
1059
  "model.layers.12": {
1060
+ "accuracy": 0.9095911979675293,
1061
+ "total_bits": 466380288,
1062
  "q_proj": {
1063
  "group_size": {
1064
+ "2": 64
1065
  },
1066
  "bits": [
1067
+ 2
1068
  ],
1069
  "bits_prop": [
1070
  1
 
1073
  },
1074
  "k_proj": {
1075
  "group_size": {
1076
+ "4": 128
1077
  },
1078
  "bits": [
1079
+ 4
1080
  ],
1081
  "bits_prop": [
1082
  1
 
1097
  },
1098
  "o_proj": {
1099
  "group_size": {
1100
+ "2": 64
1101
  },
1102
  "bits": [
1103
+ 2
1104
  ],
1105
  "bits_prop": [
1106
  1
 
1133
  },
1134
  "down_proj": {
1135
  "group_size": {
1136
+ "2": 64
1137
  },
1138
  "bits": [
1139
+ 2
1140
  ],
1141
  "bits_prop": [
1142
  1
 
1145
  }
1146
  },
1147
  "model.layers.13": {
1148
+ "accuracy": 0.9048597812652588,
1149
+ "total_bits": 458124288,
1150
  "q_proj": {
1151
  "group_size": {
1152
+ "2": 64
1153
  },
1154
  "bits": [
1155
+ 2
1156
  ],
1157
  "bits_prop": [
1158
  1
 
1161
  },
1162
  "k_proj": {
1163
  "group_size": {
1164
+ "2": 64
1165
  },
1166
  "bits": [
1167
+ 2
1168
  ],
1169
  "bits_prop": [
1170
  1
 
1185
  },
1186
  "o_proj": {
1187
  "group_size": {
1188
+ "2": 64
1189
  },
1190
  "bits": [
1191
+ 2
1192
  ],
1193
  "bits_prop": [
1194
  1
 
1221
  },
1222
  "down_proj": {
1223
  "group_size": {
1224
+ "2": 64
1225
  },
1226
  "bits": [
1227
+ 2
1228
  ],
1229
  "bits_prop": [
1230
  1
 
1233
  }
1234
  },
1235
  "model.layers.14": {
1236
+ "accuracy": 0.9164364337921143,
1237
+ "total_bits": 573724416,
1238
  "q_proj": {
1239
  "group_size": {
1240
  "2": 64
 
1249
  },
1250
  "k_proj": {
1251
  "group_size": {
1252
+ "2": 64
1253
  },
1254
  "bits": [
1255
+ 2
1256
  ],
1257
  "bits_prop": [
1258
  1
 
1273
  },
1274
  "o_proj": {
1275
  "group_size": {
1276
+ "2": 64
1277
  },
1278
  "bits": [
1279
+ 2
1280
  ],
1281
  "bits_prop": [
1282
  1
 
1321
  }
1322
  },
1323
  "model.layers.15": {
1324
+ "accuracy": 0.9038996696472168,
1325
+ "total_bits": 573724416,
1326
  "q_proj": {
1327
  "group_size": {
1328
+ "2": 64
1329
  },
1330
  "bits": [
1331
+ 2
1332
  ],
1333
  "bits_prop": [
1334
  1
 
1337
  },
1338
  "k_proj": {
1339
  "group_size": {
1340
+ "2": 64
1341
  },
1342
  "bits": [
1343
+ 2
1344
  ],
1345
  "bits_prop": [
1346
  1
 
1361
  },
1362
  "o_proj": {
1363
  "group_size": {
1364
+ "2": 64
1365
  },
1366
  "bits": [
1367
+ 2
1368
  ],
1369
  "bits_prop": [
1370
  1
 
1409
  }
1410
  },
1411
  "model.layers.16": {
1412
+ "accuracy": 0.9254007339477539,
1413
+ "total_bits": 615009024,
1414
  "q_proj": {
1415
  "group_size": {
1416
+ "2": 64
1417
  },
1418
  "bits": [
1419
+ 2
1420
  ],
1421
  "bits_prop": [
1422
  1
 
1497
  }
1498
  },
1499
  "model.layers.17": {
1500
+ "accuracy": 0.9273276329040527,
1501
+ "total_bits": 648037632,
1502
  "q_proj": {
1503
  "group_size": {
1504
+ "4": 128
1505
  },
1506
  "bits": [
1507
+ 4
1508
  ],
1509
  "bits_prop": [
1510
  1
 
1513
  },
1514
  "k_proj": {
1515
  "group_size": {
1516
+ "4": 128
1517
  },
1518
  "bits": [
1519
+ 4
1520
  ],
1521
  "bits_prop": [
1522
  1
 
1549
  },
1550
  "up_proj": {
1551
  "group_size": {
1552
+ "2": 64
1553
  },
1554
  "bits": [
1555
+ 2
1556
  ],
1557
  "bits_prop": [
1558
  1
 
1585
  }
1586
  },
1587
  "model.layers.18": {
1588
+ "accuracy": 0.9346868991851807,
1589
  "total_bits": 722356992,
1590
  "q_proj": {
1591
  "group_size": {
 
1673
  }
1674
  },
1675
  "model.layers.19": {
1676
+ "accuracy": 0.9401049613952637,
1677
+ "total_bits": 755385600,
1678
  "q_proj": {
1679
  "group_size": {
1680
+ "4": 128
1681
  },
1682
  "bits": [
1683
+ 4
1684
  ],
1685
  "bits_prop": [
1686
  1
 
1761
  }
1762
  },
1763
  "model.layers.20": {
1764
+ "accuracy": 0.9692505598068237,
1765
+ "total_bits": 846216960,
1766
  "q_proj": {
1767
  "group_size": {
1768
  "2": 64
 
1777
  },
1778
  "k_proj": {
1779
  "group_size": {
1780
+ "4": 128
1781
  },
1782
  "bits": [
1783
+ 4
1784
  ],
1785
  "bits_prop": [
1786
  1
 
1825
  },
1826
  "gate_proj": {
1827
  "group_size": {
1828
+ "4": 128
1829
  },
1830
  "bits": [
1831
+ 4
1832
  ],
1833
  "bits_prop": [
1834
  1
 
1849
  }
1850
  },
1851
  "model.layers.21": {
1852
+ "accuracy": 0.9710770845413208,
1853
+ "total_bits": 846216960,
1854
  "q_proj": {
1855
  "group_size": {
1856
  "2": 64
 
1865
  },
1866
  "k_proj": {
1867
  "group_size": {
1868
+ "4": 128
1869
  },
1870
  "bits": [
1871
+ 4
1872
  ],
1873
  "bits_prop": [
1874
  1
 
1913
  },
1914
  "gate_proj": {
1915
  "group_size": {
1916
+ "4": 128
1917
  },
1918
  "bits": [
1919
+ 4
1920
  ],
1921
  "bits_prop": [
1922
  1
 
1937
  }
1938
  },
1939
  "model.layers.22": {
1940
+ "accuracy": 0.9737789630889893,
1941
+ "total_bits": 846216960,
1942
  "q_proj": {
1943
  "group_size": {
1944
  "2": 64
 
1953
  },
1954
  "k_proj": {
1955
  "group_size": {
1956
+ "4": 128
1957
  },
1958
  "bits": [
1959
+ 4
1960
  ],
1961
  "bits_prop": [
1962
  1
 
2001
  },
2002
  "gate_proj": {
2003
  "group_size": {
2004
+ "4": 128
2005
  },
2006
  "bits": [
2007
+ 4
2008
  ],
2009
  "bits_prop": [
2010
  1
 
2025
  }
2026
  },
2027
  "model.layers.23": {
2028
+ "accuracy": 0.9729764461517334,
2029
+ "total_bits": 846216960,
2030
  "q_proj": {
2031
  "group_size": {
2032
  "2": 64
 
2041
  },
2042
  "k_proj": {
2043
  "group_size": {
2044
+ "4": 128
2045
  },
2046
  "bits": [
2047
+ 4
2048
  ],
2049
  "bits_prop": [
2050
  1
 
2089
  },
2090
  "gate_proj": {
2091
  "group_size": {
2092
+ "4": 128
2093
  },
2094
  "bits": [
2095
+ 4
2096
  ],
2097
  "bits_prop": [
2098
  1
 
2113
  }
2114
  },
2115
  "model.layers.24": {
2116
+ "accuracy": 0.9737536907196045,
2117
+ "total_bits": 846216960,
2118
  "q_proj": {
2119
  "group_size": {
2120
  "2": 64
 
2129
  },
2130
  "k_proj": {
2131
  "group_size": {
2132
+ "4": 128
2133
  },
2134
  "bits": [
2135
+ 4
2136
  ],
2137
  "bits_prop": [
2138
  1
 
2177
  },
2178
  "gate_proj": {
2179
  "group_size": {
2180
+ "4": 128
2181
  },
2182
  "bits": [
2183
+ 4
2184
  ],
2185
  "bits_prop": [
2186
  1
 
2201
  }
2202
  },
2203
  "model.layers.25": {
2204
+ "accuracy": 0.9738945960998535,
2205
+ "total_bits": 846216960,
2206
  "q_proj": {
2207
  "group_size": {
2208
  "2": 64
 
2217
  },
2218
  "k_proj": {
2219
  "group_size": {
2220
+ "4": 128
2221
  },
2222
  "bits": [
2223
+ 4
2224
  ],
2225
  "bits_prop": [
2226
  1
 
2265
  },
2266
  "gate_proj": {
2267
  "group_size": {
2268
+ "4": 128
2269
  },
2270
  "bits": [
2271
+ 4
2272
  ],
2273
  "bits_prop": [
2274
  1
 
2289
  }
2290
  },
2291
  "model.layers.26": {
2292
+ "accuracy": 0.9777768850326538,
2293
+ "total_bits": 879245568,
2294
  "q_proj": {
2295
  "group_size": {
2296
+ "4": 128
2297
  },
2298
  "bits": [
2299
+ 4
2300
  ],
2301
  "bits_prop": [
2302
  1
 
2305
  },
2306
  "k_proj": {
2307
  "group_size": {
2308
+ "4": 128
2309
  },
2310
  "bits": [
2311
+ 4
2312
  ],
2313
  "bits_prop": [
2314
  1
 
2353
  },
2354
  "gate_proj": {
2355
  "group_size": {
2356
+ "4": 128
2357
  },
2358
  "bits": [
2359
+ 4
2360
  ],
2361
  "bits_prop": [
2362
  1
 
2377
  }
2378
  },
2379
  "model.layers.27": {
2380
+ "accuracy": 0.9767287969589233,
2381
+ "total_bits": 879245568,
2382
  "q_proj": {
2383
  "group_size": {
2384
+ "4": 128
2385
  },
2386
  "bits": [
2387
+ 4
2388
  ],
2389
  "bits_prop": [
2390
  1
 
2393
  },
2394
  "k_proj": {
2395
  "group_size": {
2396
+ "4": 128
2397
  },
2398
  "bits": [
2399
+ 4
2400
  ],
2401
  "bits_prop": [
2402
  1
 
2429
  },
2430
  "up_proj": {
2431
  "group_size": {
2432
+ "4": 128
2433
  },
2434
  "bits": [
2435
+ 4
2436
  ],
2437
  "bits_prop": [
2438
  1
 
2465
  }
2466
  },
2467
  "model.layers.28": {
2468
+ "accuracy": 0.9730557203292847,
2469
+ "total_bits": 879245568,
2470
  "q_proj": {
2471
  "group_size": {
2472
+ "4": 128
2473
  },
2474
  "bits": [
2475
+ 4
2476
  ],
2477
  "bits_prop": [
2478
  1
 
2481
  },
2482
  "k_proj": {
2483
  "group_size": {
2484
+ "4": 128
2485
  },
2486
  "bits": [
2487
+ 4
2488
  ],
2489
  "bits_prop": [
2490
  1
 
2529
  },
2530
  "gate_proj": {
2531
  "group_size": {
2532
+ "4": 128
2533
  },
2534
  "bits": [
2535
+ 4
2536
  ],
2537
  "bits_prop": [
2538
  1
 
2553
  }
2554
  },
2555
  "model.layers.29": {
2556
+ "accuracy": 0.9716944694519043,
2557
+ "total_bits": 879245568,
2558
  "q_proj": {
2559
  "group_size": {
2560
+ "4": 128
2561
  },
2562
  "bits": [
2563
+ 4
2564
  ],
2565
  "bits_prop": [
2566
  1
 
2569
  },
2570
  "k_proj": {
2571
  "group_size": {
2572
+ "4": 128
2573
  },
2574
  "bits": [
2575
+ 4
2576
  ],
2577
  "bits_prop": [
2578
  1
 
2605
  },
2606
  "up_proj": {
2607
  "group_size": {
2608
+ "4": 128
2609
  },
2610
  "bits": [
2611
+ 4
2612
  ],
2613
  "bits_prop": [
2614
  1
 
2641
  }
2642
  },
2643
  "model.layers.30": {
2644
+ "accuracy": 0.9692039489746094,
2645
+ "total_bits": 879245568,
2646
  "q_proj": {
2647
  "group_size": {
2648
+ "4": 128
2649
  },
2650
  "bits": [
2651
+ 4
2652
  ],
2653
  "bits_prop": [
2654
  1
 
2657
  },
2658
  "k_proj": {
2659
  "group_size": {
2660
+ "4": 128
2661
  },
2662
  "bits": [
2663
+ 4
2664
  ],
2665
  "bits_prop": [
2666
  1
 
2693
  },
2694
  "up_proj": {
2695
  "group_size": {
2696
+ "4": 128
2697
  },
2698
  "bits": [
2699
+ 4
2700
  ],
2701
  "bits_prop": [
2702
  1
 
2729
  }
2730
  },
2731
  "model.layers.31": {
2732
+ "accuracy": 0.9576601982116699,
2733
+ "total_bits": 879245568,
2734
  "q_proj": {
2735
  "group_size": {
2736
+ "4": 128
2737
  },
2738
  "bits": [
2739
+ 4
2740
  ],
2741
  "bits_prop": [
2742
  1