diff --git "a/measurement.json" "b/measurement.json" --- "a/measurement.json" +++ "b/measurement.json" @@ -1,439 +1,14 @@ { - "measurement": [ - { - "key": "model.layers.0.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.011008086614310741, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.009480787441134453, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.004715380258858204, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.005000735633075237, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.005000532604753971, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.0019952079746872187, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.010630316101014614, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.009400513023138046, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.00518033979460597, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.004573217127472162, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.00477508595213294, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.004969432484358549, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.004572051111608744, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0026976901572197676, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.0020599854178726673, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.002617002697661519, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.0018392560305073857, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.001555339666083455, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.0017839501379057765, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.0015046523185446858, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.0017111103516072035, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.0017837896011769772, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.001352182705886662, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0014835763722658157, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.011008086614310741, - "qparams": { - "group_size": 32, + "measurement": { + "model.layers.0.self_attn": [ + { + "accuracy": 0.905276358127594, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 @@ -443,15 +18,12 @@ 0.95 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.011008086614310741, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 @@ -461,443 +33,12 @@ 0.95 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.011252800934016705, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.009727324359118938, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.004757750313729048, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.005007198546081781, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.0050069489516317844, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0019285704474896193, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.011200709268450737, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.009618634358048439, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.0051782685332000256, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.004547890275716782, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.004752656910568476, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.00500953895971179, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.004544542636722326, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.0026500916574150324, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.0019413988338783383, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.002609242917969823, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.0016849798848852515, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.0013510312419384718, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.0016216388903558254, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.0012880872236564755, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.0016293596709147096, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.0016213101334869862, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.0012313327752053738, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0012620965717360377, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.011252800934016705, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 @@ -907,15 +48,12 @@ 0.95 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.011252800934016705, - "qparams": { - "group_size": 32, + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 @@ -926,4167 +64,127 @@ ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.0.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.1179886981844902, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.07187287509441376, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.04420781880617142, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.05088876187801361, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.05083818733692169, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.02453853376209736, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.07954216748476028, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.0659370943903923, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.05568453297019005, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.032017771154642105, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03831000626087189, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.043213386088609695, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.03167303279042244, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.024733329191803932, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.02282278798520565, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.022219255566596985, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.013116978108882904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.011273865588009357, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009573610499501228, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008029025979340076, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.011370216496288776, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.00951514020562172, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.007355750072747469, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00623891269788146, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.032017771154642105, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.032017771154642105, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.1123935654759407, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.07292488217353821, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.04752170667052269, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.05054657906293869, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.04894748702645302, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.026086676865816116, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.07282362133264542, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06508342176675797, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.053403209894895554, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03224589303135872, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.034712012857198715, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03723453730344772, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.03149037063121796, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.024397484958171844, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.022450661286711693, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.018828794360160828, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.01404129434376955, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012704739347100258, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.011327584274113178, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.010027693584561348, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.01046806015074253, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.011223881505429745, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.008222579024732113, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.008776634000241756, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03224589303135872, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03224589303135872, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.11428822576999664, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.10680456459522247, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.10441695153713226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.09558585286140442, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.05169820040464401, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.04957433044910431, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.057565875351428986, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.0530671551823616, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.052166201174259186, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.047037091106176376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.04541381448507309, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.029266206547617912, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.025489740073680878, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.024895552545785904, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.024759845808148384, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.014726360328495502, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.013439776375889778, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01330955047160387, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.012601444497704506, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.012517012655735016, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.008250698447227478, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.008979413658380508, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.008076385594904423, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.00697943102568388, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.029266206547617912, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.029266206547617912, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1365624964237213, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.12912987172603607, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.12676769495010376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.11611566692590714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.06209452077746391, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06002208590507507, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.06858552992343903, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.06337832659482956, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.06252980977296829, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.056896764785051346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.054752517491579056, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0345861092209816, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.030027201399207115, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.029494961723685265, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.029362663626670837, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.017245711758732796, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.015117036178708076, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.014985506422817707, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.014105459675192833, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.014023742638528347, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.009038952179253101, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.009028298780322075, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.008861254900693893, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.0059475889429450035, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0345861092209816, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0345861092209816, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.07756949216127396, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.06662952899932861, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.06098438426852226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.054018594324588776, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.03468063846230507, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.03010287694633007, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.04617941379547119, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.04011668637394905, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.03608940914273262, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.02880309522151947, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.02771691419184208, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.02264729142189026, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.019248928874731064, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.017140867188572884, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.016617724671959877, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.011672048829495907, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.009798625484108925, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.009562766179442406, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.008797382935881615, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.0084944823756814, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.00700214272364974, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.007365803699940443, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.006399260833859444, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.006037040613591671, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.03608940914273262, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.03608940914273262, - "qparams": { - "group_size": 32, + { + "accuracy": 0.9225013852119446, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 4, - 3 + 3, + 2 ], "bits_prop": [ - 0.05, - 0.95 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.019874876365065575, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.013585426844656467, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.007735891733318567, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.00843197014182806, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.008182569406926632, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.0036739413626492023, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.014163860119879246, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.01281207799911499, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.009323764592409134, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.006247940473258495, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.006709864363074303, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.007160904351621866, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.006114661693572998, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.004099807236343622, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.0034705004654824734, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.0036104123573750257, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.0023682513274252415, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.0019723158329725266, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.0020500104874372482, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.0016610488528385758, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.0019725735764950514, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.002030044561251998, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.001334486179985106, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.001493388437665999, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.019874876365065575, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 ], "bits_prop": [ - 0.05, - 0.95 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.019874876365065575, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 ], "bits_prop": [ - 0.05, - 0.95 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.01725795306265354, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.012093580327928066, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.006607988849282265, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.00721750408411026, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.007033235859125853, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0029313776176422834, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.013011518865823746, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.011555345728993416, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.008028173819184303, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.005576727446168661, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0060486746951937675, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.006530762650072575, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.0054851919412612915, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.00350525276735425, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.002851716009899974, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.0032781597692519426, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.0019940542988479137, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.0016066553071141243, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.0017477767542004585, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.0013522366061806679, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.0017501648981124163, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.0017335435841232538, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.0010826997458934784, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0012200791388750076, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.01725795306265354, - "qparams": { - "group_size": 32, + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 ], "bits_prop": [ - 0.05, - 0.95 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.01725795306265354, - "qparams": { - "group_size": 32, + { + "accuracy": 0.9264937043190002, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ 3, 2 ], "bits_prop": [ - 0.05, - 0.95 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.14239169657230377, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.09505699574947357, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.06875743716955185, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06697525829076767, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.06128355860710144, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03653048351407051, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.08763709664344788, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.07928841561079025, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.06774573773145676, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.041482388973236084, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.042725760489702225, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.044640351086854935, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.03791728988289833, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.02974652871489525, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.02747594378888607, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.022380836308002472, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.01571665145456791, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.013734154403209686, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.011936167255043983, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.010191005654633045, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.01158654224127531, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.011305367574095726, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.00867464765906334, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.007329844869673252, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03653048351407051, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 6, - 3 + 3, + 2 ], "bits_prop": [ - 0.2, - 0.8 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03653048351407051, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 6, + 4, 3 ], "bits_prop": [ - 0.2, - 0.8 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.15714740753173828, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.12757422029972076, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.11435312032699585, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.09585023671388626, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.07240363210439682, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.060205765068531036, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.09273844212293625, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.08198265731334686, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.07474842667579651, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.05332636833190918, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.05090031400322914, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.04780793562531471, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.03998832404613495, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.035709451884031296, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.034660205245018005, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.0242025014013052, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.01999175176024437, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.01912822760641575, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01666872389614582, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.015979060903191566, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.013610182330012321, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.014646978117525578, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.012231334112584591, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011686385609209538, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.035709451884031296, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.035709451884031296, - "qparams": { - "group_size": 32, + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5, - 4 + 3, + 2 ], "bits_prop": [ 0.1, @@ -5094,9686 +192,496 @@ ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.1.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1656760573387146, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.15635865926742554, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.15353770554065704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.13963083922863007, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.0769786685705185, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07422534376382828, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08518023788928986, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07841798663139343, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07750438898801804, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06978286057710648, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06711457669734955, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04373549669981003, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03806111961603165, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03744320198893547, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.0373053178191185, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02212887816131115, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.020502232015132904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.020343394950032234, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.019203657284379005, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.019117169082164764, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012628615833818913, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.014005900360643864, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.012459107674658298, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011213801801204681, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03806111961603165, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03744320198893547, - "qparams": { - "group_size": 32, + }, + { + "accuracy": 0.956719696521759, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5, - 4 + 3, + 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2047884613275528, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1942071169614792, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.19092969596385956, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1740131825208664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.09555835276842117, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.09233932942152023, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.10543309152126312, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.09718145430088043, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.09621072560548782, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.08684265613555908, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.08339712768793106, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.05383151397109032, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04677797853946686, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.0461047925055027, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04594189673662186, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.026967624202370644, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.024581704288721085, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.024394122883677483, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.022936098277568817, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.022831851616501808, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.014768211171030998, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01599878817796707, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.014556103385984898, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.012113755568861961, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.026967624202370644, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.026967624202370644, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.01708594709634781, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.0167019534856081, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.006396118085831404, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.006061194930225611, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.005475408863276243, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.003066959325224161, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.01727076806128025, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.01629881002008915, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.0055337632074952126, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.005254710093140602, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.004898046143352985, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.004765623714774847, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.00505808275192976, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.002604465465992689, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.002528575249016285, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.0024614613503217697, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.0023318396415561438, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.0009456862462684512, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.0023063248954713345, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.0008774535381235182, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.0022868856322020292, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.002283188747242093, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.0007061843643896282, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.0007709024357609451, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.01708594709634781, - "qparams": { - "group_size": 32, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 3, - 2 + 4, + 3 ], "bits_prop": [ - 0.05, - 0.95 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.01708594709634781, - "qparams": { - "group_size": 32, + { + "accuracy": 0.9574673175811768, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 3, - 2 + 4, + 3 ], "bits_prop": [ - 0.05, - 0.95 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.055726174265146255, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.04376351088285446, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.0368381142616272, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.03328895568847656, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.024614162743091583, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.01859113574028015, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.0340261235833168, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.030913932248950005, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.02629847265779972, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.019058609381318092, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.018832499161362648, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.017249906435608864, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.014754556119441986, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.011930531822144985, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.011178301647305489, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.008622060529887676, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.00633257208392024, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.005772964563220739, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.005267673637717962, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.004742346238344908, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.004494877997785807, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.004507370293140411, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.003507588291540742, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.003027360187843442, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.0368381142616272, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ 4, - 2 + 3 ], "bits_prop": [ - 0.25, - 0.75 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.0368381142616272, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ 4, - 2 + 3 ], "bits_prop": [ - 0.25, - 0.75 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.057543814182281494, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.0432782880961895, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.03429792448878288, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.03171512112021446, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.024842090904712677, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.01731954701244831, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.036236803978681564, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.032770801335573196, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.027001377195119858, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.018888643011450768, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.019024500623345375, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.018258584663271904, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.015616614371538162, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.012096265330910683, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.011114757508039474, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.009143443778157234, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.006452736910432577, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.005746965762227774, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.005318652372807264, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.004643811844289303, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.004764900077134371, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.004748496226966381, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.003558092750608921, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0031890724785625935, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.03429792448878288, - "qparams": { - "group_size": 32, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ 4, - 2 + 3 ], "bits_prop": [ - 0.25, - 0.75 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.03429792448878288, - "qparams": { - "group_size": 32, + { + "accuracy": 0.9579824805259705, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ 4, - 2 + 3 ], "bits_prop": [ - 0.25, - 0.75 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.17535729706287384, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.14598512649536133, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.1338554471731186, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.11836200952529907, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.08032560348510742, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.06765421479940414, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.09919237345457077, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.08969398587942123, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.08362171053886414, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.06329043209552765, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.060523003339767456, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05038369446992874, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04295951873064041, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.0386405847966671, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.03755888342857361, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02518906444311142, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.01993013173341751, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.018928762525320053, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.016488047316670418, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.015739239752292633, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.013143296353518963, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01269336324185133, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.01158301904797554, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00810936838388443, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.0386405847966671, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.03755888342857361, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 6, - 4 + 4, + 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.16046418249607086, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.14225806295871735, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1329043209552765, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.11382444202899933, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.07488379627466202, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.0663616955280304, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.0936114639043808, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.08424723893404007, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.076744444668293, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.06210482120513916, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.05794394761323929, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.04881187155842781, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04120277985930443, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0369732640683651, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.035922590643167496, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.024919917806982994, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.020577557384967804, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.019832070916891098, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.018260689452290535, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.017629560083150864, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.014343321323394775, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.014937289990484715, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.013030324131250381, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011839104816317558, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0369732640683651, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0369732640683651, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, "bits": [ - 5, - 4 + 4, + 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.20635904371738434, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1942535787820816, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.19044062495231628, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.17251691222190857, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.09705071896314621, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.0930805429816246, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.10743898898363113, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.09908481687307358, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.09784035384654999, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.0869334489107132, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.08293698728084564, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.054917216300964355, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04753895848989487, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04665078967809677, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04643860459327698, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.027482960373163223, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02441168576478958, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02417401410639286, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.022423071786761284, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02229403518140316, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.014802736230194569, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.015299234539270401, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.014515620656311512, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.0109413368627429, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.027482960373163223, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.027482960373163223, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9678816795349121, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24200908839702606, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.22804591059684753, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2237907350063324, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.20274972915649414, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11369670927524567, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.109160415828228, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12548723816871643, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11592335999011993, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11458571255207062, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10190894454717636, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09707681834697723, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06384675204753876, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05537153035402298, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.0543905533850193, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05416255444288254, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.031883880496025085, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02791099064052105, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02763889729976654, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02553119696676731, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.025387223809957504, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016723575070500374, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.016673494130373, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016404900699853897, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010990110225975513, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.031883880496025085, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.031883880496025085, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.18646162748336792, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.16460826992988586, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.15596750378608704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.13818705081939697, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.08434103429317474, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.07611331343650818, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.10028424859046936, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.0913621187210083, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.08691035956144333, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.07140965759754181, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.06773243099451065, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.050833217799663544, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.04371114820241928, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04059164226055145, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.03983470797538757, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.02552850730717182, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.021460751071572304, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.021090492606163025, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.01898851990699768, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.01850431226193905, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.013875558972358704, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.014115937985479832, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.012850100174546242, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.010146813467144966, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.02552850730717182, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.02552850730717182, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9691770076751709, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.0432896614074707, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.0361945778131485, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.032022759318351746, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.02842177078127861, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.01962217129766941, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.01610889658331871, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.02611484006047249, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.0237275380641222, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.020496200770139694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.015796858817338943, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.015342436730861664, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.013285323046147823, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.011350498534739017, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.009514008648693562, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.009032846428453922, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.0066605922766029835, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.005050791893154383, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.0047056893818080425, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.004317707382142544, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.003999331034719944, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.00353656685911119, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.00353130791336298, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.0029021049849689007, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0024309800937771797, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.0361945778131485, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 3, - 2 + 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.0361945778131485, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 3, - 2 + 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.043265096843242645, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.035279031842947006, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.02996564842760563, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.02670791558921337, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.019296495243906975, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.01496174093335867, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.02708551287651062, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.024535205215215683, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.020373551174998283, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.015334178693592548, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.01514138001948595, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.013732567429542542, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.011706359684467316, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.00932629406452179, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.008673018775880337, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.00687676714733243, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.004889734089374542, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.0044369446113705635, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.004116814583539963, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.003676511812955141, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.003571151988580823, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.003455620724707842, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.0027184304781258106, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0022179954685270786, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.035279031842947006, - "qparams": { - "group_size": 32, + }, + "o_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 3, - 2 + 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.035279031842947006, - "qparams": { - "group_size": 32, + { + "accuracy": 0.9727419018745422, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 3, - 2 + 4 ], "bits_prop": [ - 0.25, - 0.75 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.180687814950943, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.15344581007957458, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.14216534793376923, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.12534737586975098, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.08297022432088852, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.07174941152334213, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.10035136342048645, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.09197210520505905, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.08593010902404785, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.06656701117753983, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.06328283250331879, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05096811428666115, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.0439530685544014, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.03983229026198387, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.038803696632385254, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02550109103322029, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02039390243589878, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.019472306594252586, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.017051976174116135, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.016345463693141937, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.013106175698339939, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.012736363336443901, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.01163308136165142, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.007917646318674088, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.038803696632385254, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 6, 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.038803696632385254, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 6, 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.1774052381515503, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.15738162398338318, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.14953048527240753, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.1289142221212387, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.08288279920816422, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.07535234093666077, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.09772436320781708, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.0888654813170433, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.08440102636814117, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.06784721463918686, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.0632987841963768, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.050286244601011276, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04314207285642624, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04048558324575424, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.039840906858444214, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.025387173518538475, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.022071612998843193, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.021545285359025, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01927889510989189, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.01889101043343544, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.014127779752016068, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.015242011286318302, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.013302987441420555, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.01187866646796465, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.025387173518538475, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.025387173518538475, - "qparams": { - "group_size": 128, + { + "accuracy": 0.974399983882904, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.21428772807121277, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.20172958076000214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1977907419204712, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.17942751944065094, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.10108758509159088, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.09690531343221664, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.11192798614501953, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.10324999690055847, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.10192669928073883, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09053996205329895, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.0864163264632225, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.05717093124985695, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.049462106078863144, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04850538447499275, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04828397557139397, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02860250510275364, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.025171512737870216, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.024916892871260643, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.023079246282577515, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02294023334980011, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.015295959077775478, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.015473770909011364, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.0149886105209589, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.01071632094681263, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02860250510275364, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02860250510275364, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.25095880031585693, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.23646703362464905, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2320457398891449, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.21060945093631744, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11843224614858627, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11368003487586975, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.1311378926038742, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.12086735665798187, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11946311593055725, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10625918209552765, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.1014060378074646, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06691620498895645, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05782357230782509, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05675369128584862, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05649882182478905, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03346271440386772, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02918759547173977, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.028892045840620995, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02671726606786251, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.026557760313153267, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01778590679168701, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01753842458128929, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.017429200932383537, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011677514761686325, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03346271440386772, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03346271440386772, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9787182807922363, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21070393919944763, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.18713396787643433, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.17817099392414093, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.1582850068807602, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09596149623394012, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08702981472015381, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11261302977800369, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.1030813530087471, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.09870881587266922, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.0815805122256279, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07732559740543365, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.0572211891412735, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.04932600259780884, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04613226279616356, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04535973444581032, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.028713418170809746, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02422979101538658, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02385350875556469, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.021447574719786644, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.020952804014086723, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.015458157286047935, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.015680409967899323, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.014424173161387444, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011075858026742935, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.028713418170809746, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.028713418170809746, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.06060928478837013, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.05133335664868355, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.045697398483753204, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.040760986506938934, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.027565978467464447, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.022865695878863335, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.036897994577884674, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.03327404707670212, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.028741121292114258, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.022540362551808357, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.021920837461948395, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.01879393681883812, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.015963181853294373, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.013390731997787952, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.012726576998829842, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.009439364075660706, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.007166787050664425, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.006698045413941145, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.0062110242433846, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.005780763458460569, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.005040057003498077, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.005061996169388294, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.00414578290656209, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0035737149883061647, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.036897994577884674, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.036897994577884674, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.058346256613731384, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.048162348568439484, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.04178956523537636, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.0374012365937233, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.026040691882371902, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.020800041034817696, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.03611510619521141, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.03243023157119751, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.027527421712875366, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.02105175331234932, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.02069544419646263, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.018444441258907318, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.015526934526860714, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.01264967955648899, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.011867701075971127, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.009267323650419712, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.006717116571962833, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.006188718602061272, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.005754878744482994, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.005250188987702131, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.0048539177514612675, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.004775653127580881, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.0038027248810976744, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0032500966917723417, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.0374012365937233, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.0374012365937233, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 + 5, + 4 ], "bits_prop": [ - 0.1, - 0.4, - 0.5 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.19744235277175903, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.1702762395143509, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.1595173478126526, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.14097589254379272, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09094136953353882, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08000816404819489, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.1084456741809845, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.09962529689073563, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09390757977962494, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.0742809921503067, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07045426219701767, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05517864227294922, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.047613970935344696, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04368162527680397, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04268242418766022, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.027588021010160446, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.022474143654108047, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.021581389009952545, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.019076701253652573, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.018399082124233246, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.014291688799858093, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.0140438387170434, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.012883895076811314, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00894422922283411, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.027588021010160446, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.027588021010160446, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9801473617553711, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.17206625640392303, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.15267585217952728, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.14543680846691132, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.12393102049827576, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.08002860099077225, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.07269425690174103, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.09563377499580383, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.08568979799747467, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.08173643797636032, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.06607451289892197, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06052728369832039, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.04912261664867401, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04176447540521622, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.03923546150326729, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.03862182796001434, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.024886757135391235, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02147124707698822, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.020942697301506996, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.018874339759349823, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.018486352637410164, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.013961649499833584, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.014908517710864544, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.013167290017008781, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011690462939441204, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.03862182796001434, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 6, + 5, 4 ], "bits_prop": [ @@ -14781,17 +689,14 @@ 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.03862182796001434, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 6, + 5, 4 ], "bits_prop": [ @@ -14799,35947 +704,1950 @@ 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.19018860161304474, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.17809708416461945, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.17402686178684235, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.15749730169773102, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08978208154439926, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.08556228876113892, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.10049739480018616, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.09242699295282364, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.09069548547267914, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.079914391040802, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07618986815214157, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.05133349820971489, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04433472454547882, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.043151479214429855, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04287421703338623, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.025751233100891113, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.022458519786596298, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.022174496203660965, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.020495660603046417, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.020320914685726166, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.013864587992429733, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.013930989429354668, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.013476544991135597, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.009701383300125599, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.025751233100891113, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.025751233100891113, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9809694290161133, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.25205501914024353, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.2366967648267746, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.23169328272342682, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.20988741517066956, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11915455758571625, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11384352296590805, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.1326175034046173, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.12215891480445862, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.12029532343149185, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10635297000408173, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.10134588181972504, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06762821972370148, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05848423019051552, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05712828040122986, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05681443214416504, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03385412320494652, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.029378142207860947, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02902841754257679, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.026772284880280495, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02657180465757847, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.017945216968655586, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.017667528241872787, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01750364527106285, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011698061600327492, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03385412320494652, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03385412320494652, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 64 + }, "bits": [ 5 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21543458104133606, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.1922985017299652, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.1835690587759018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.16365349292755127, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09848685562610626, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08979357779026031, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11606602370738983, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10554132610559464, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10118244588375092, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08440231531858444, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08007698506116867, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.059125013649463654, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.0506744310259819, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04749642312526703, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04671819135546684, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029755551367998123, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.025155024603009224, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.024778049439191818, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02244914509356022, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.021963126957416534, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.016178270801901817, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016525061801075935, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015108714811503887, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011993911117315292, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029755551367998123, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029755551367998123, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9824387431144714, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.07332396507263184, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.06400083750486374, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.058489661663770676, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.0521303154528141, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.033631693571805954, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.02908443659543991, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.04331424832344055, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.03938489779829979, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.03474517911672592, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.028156248852610588, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.027132142335176468, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.021959150210022926, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.018834903836250305, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.01628248766064644, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.015626680105924606, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.011016516014933586, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.008620885200798512, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.008160081692039967, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.007562675513327122, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.007142954505980015, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.005847304128110409, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.005898186005651951, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.004961786791682243, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.004095521755516529, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.03474517911672592, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 4, - 3 + 5, + 4 ], "bits_prop": [ - 0.05, - 0.95 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.03474517911672592, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "5": 32 + }, "bits": [ - 4, - 3 + 5 ], "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.06712266057729721, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.05752161517739296, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.051533982157707214, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.04579797759652138, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03041881136596203, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.02554282173514366, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.04029816389083862, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.03672593832015991, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.031630292534828186, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.02517213113605976, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.024386843666434288, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.020454581826925278, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.017494991421699524, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.014656404964625835, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.013919126242399216, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.01021964754909277, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.007618405390530825, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.007092737592756748, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.006571256555616856, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.006071341224014759, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.005282144527882338, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.005144122522324324, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.004267506301403046, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0032715308479964733, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.03672593832015991, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.03672593832015991, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.21488776803016663, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.1911017894744873, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.18241919577121735, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.16169804334640503, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09995093196630478, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09081289172172546, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11584263294935226, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.1066051498055458, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.10227485001087189, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.0838887095451355, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07927955687046051, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05886221304535866, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05091295391321182, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04787079617381096, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04714690148830414, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.029392991214990616, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.024312622845172882, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.023607991635799408, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.0209941603243351, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.020509278401732445, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.015098136849701405, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.014533315785229206, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013986708596348763, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008768432773649693, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.029392991214990616, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.029392991214990616, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9892516732215881, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.19767117500305176, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.1801137775182724, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.17317280173301697, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.15098661184310913, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09236371517181396, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08552493155002594, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.10716091096401215, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.09805913269519806, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09394580125808716, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07844267040491104, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07352089881896973, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.055225808173418045, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04725612327456474, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.044651105999946594, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04402231425046921, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027839886024594307, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02359805256128311, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02304759807884693, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.0208735428750515, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.020464356988668442, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.015175994485616684, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.015368616208434105, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.014276575297117233, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011109618470072746, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027839886024594307, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027839886024594307, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.18408501148223877, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.17255312204360962, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.16861093044281006, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.15253858268260956, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08686607331037521, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.0827871710062027, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09693455696105957, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08950028568506241, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.0877482146024704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07739926874637604, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07367749512195587, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04938216134905815, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04283727705478668, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04166407883167267, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.041388534009456635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02470775879919529, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02146594226360321, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.021187379956245422, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01954752393066883, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01937275193631649, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.013019920326769352, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.013008092530071735, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.012626564130187035, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008674965240061283, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02470775879919529, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02470775879919529, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9895769357681274, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2505965828895569, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.23552702367305756, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.23059533536434174, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.2091030329465866, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11850132048130035, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11330504715442657, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.13162846863269806, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.12157455831766129, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11960314214229584, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10591572523117065, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.10089893639087677, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06703709065914154, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05811423808336258, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.056754905730485916, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05643143877387047, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03350282832980156, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.029048016294836998, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.0287050548940897, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.026467420160770416, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.026263758540153503, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.017505647614598274, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.017275456339120865, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.017055291682481766, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011156143620610237, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03350282832980156, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03350282832980156, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21274308860301971, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.19182860851287842, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.1832045465707779, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.16390112042427063, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09776193648576736, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08945062011480331, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11601609736680984, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.1052962988615036, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10019276291131973, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08455131202936172, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08073459565639496, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.05920138955116272, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05052012950181961, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04707002267241478, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.046221714466810226, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029877711087465286, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02477819286286831, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02436230145394802, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.022222744300961494, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02168622985482216, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.016257816925644875, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016137124970555305, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.01506487000733614, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.01145330723375082, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029877711087465286, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029877711087465286, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9910632371902466, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.06839777529239655, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.059341803193092346, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.05321309715509415, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.04743611812591553, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.03127928823232651, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.026392057538032532, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.04209749400615692, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.037958260625600815, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.032409898936748505, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.026126859709620476, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.02539909817278385, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.02137308567762375, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.01815277524292469, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.015170758590102196, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.014389093965291977, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.010716482996940613, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.008060626685619354, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.007523725740611553, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.007071373052895069, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.006564543582499027, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.005661726929247379, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.005653662141412497, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.004591871984302998, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0038938596844673157, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.037958260625600815, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.037958260625600815, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.06325162947177887, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.05394313856959343, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.047364264726638794, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.042228665202856064, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.028602302074432373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.02338532917201519, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.03936747834086418, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.035691894590854645, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.02979852631688118, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.023626776412129402, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.023106908425688744, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.019965289160609245, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.017033273354172707, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.013840028084814548, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.012976893223822117, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.009978552348911762, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.007270153611898422, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.0066886181011796, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.006300569977611303, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.005733390338718891, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.0052036442793905735, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.005098978988826275, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.004084066953510046, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0033414335921406746, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.035691894590854645, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.035691894590854645, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 + "bits_prop": [ + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.1928456425666809, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.17164906859397888, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.16270801424980164, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.14451023936271667, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.08966043591499329, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08101359754800797, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.10643062740564346, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.09750240296125412, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09178631752729416, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07554522901773453, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07162470370531082, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05404521897435188, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.0465431734919548, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.042997829616069794, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.042118530720472336, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.0269667599350214, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.021962104365229607, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.021208610385656357, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.01904800534248352, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.01845702715218067, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.013857051730155945, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.013461961410939693, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.012543372809886932, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008324005641043186, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.0269667599350214, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.0269667599350214, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.6.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.19653408229351044, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.1734381765127182, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.16417720913887024, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.14270001649856567, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09183163940906525, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08196190744638443, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.10928080976009369, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.09887833893299103, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09378831088542938, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07556942105293274, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07054726034402847, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05627569556236267, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04780341684818268, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.044581640511751175, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.0437970906496048, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02826836332678795, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.023766538128256798, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02303030900657177, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.020651226863265038, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.020147783681750298, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.015315333381295204, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01586781069636345, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.0142465028911829, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011698782444000244, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02826836332678795, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.991538405418396, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02826836332678795, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.17780907452106476, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1669689565896988, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.16319823265075684, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.14802955090999603, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08401203155517578, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.08010852336883545, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09417697787284851, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08672436326742172, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08484075218439102, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07511534541845322, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07175573706626892, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04811670631170273, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04155287891626358, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04034249484539032, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.0400567427277565, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02409239485859871, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02089439332485199, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.020612036809325218, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.019097082316875458, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01892070472240448, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012857337482273579, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01282582525163889, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01244988664984703, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008736170828342438, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02409239485859871, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "8": 32 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02409239485859871, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.6.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24478907883167267, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.230355367064476, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2254604995250702, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.20470291376113892, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11584290117025375, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11073841154575348, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12926092743873596, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11907630413770676, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11689663678407669, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10368651151657104, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09915518015623093, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06593052297830582, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05698317661881447, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.055538516491651535, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05519680678844452, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.032988060265779495, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.028539156541228294, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02818659134209156, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02606617659330368, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02584858424961567, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01743336021900177, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.017162349075078964, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016953185200691223, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011306785978376865, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.032988060265779495, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9952467679977417, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.032988060265779495, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21917787194252014, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.19852879643440247, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.19048908352851868, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.17045587301254272, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10111018270254135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.0932023674249649, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11902499943971634, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10781162232160568, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.1035153791308403, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08778620511293411, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08379415422677994, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.060593340545892715, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.051765553653240204, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04872898384928703, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04800724983215332, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030448956415057182, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.025740234181284904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.025375358760356903, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.023163683712482452, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02269570156931877, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.016476023942232132, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01676851138472557, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015460778959095478, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012081608176231384, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030448956415057182, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030448956415057182, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } } - }, - { - "key": "model.layers.7.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.07554368674755096, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.06749670207500458, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.06062848120927811, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.05399617925286293, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.0349104069173336, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.02981255017220974, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.047654349356889725, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.042982570827007294, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.03580741584300995, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.02989584393799305, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.029159070923924446, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.024285199120640755, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.02067190408706665, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.016987670212984085, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.01600530929863453, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01218666136264801, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.009125988930463791, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.008510309271514416, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.008175395429134369, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.007579146418720484, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.006464578676968813, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.006561424117535353, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.0051626646891236305, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.004644515458494425, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.03580741584300995, - "qparams": { - "group_size": 32, + ], + "model.layers.0.mlp": [ + { + "accuracy": 0.9069831967353821, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 4, - 3 + 3, + 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.03580741584300995, - "qparams": { - "group_size": 32, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 4, - 3 + 3, + 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.06635280698537827, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.05857096612453461, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.051400650292634964, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.04564184695482254, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03034098632633686, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.025086235255002975, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.042668648064136505, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.038741324096918106, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03125791251659393, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.0257805697619915, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.02520078606903553, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.021695906296372414, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.018587345257401466, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.01468892302364111, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.013617862947285175, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.010872524231672287, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.007753182202577591, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.00706142745912075, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.006870645564049482, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.006171957589685917, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.0056626033037900925, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.005558387842029333, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.004296841099858284, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0036187847144901752, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.038741324096918106, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.038741324096918106, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.20841199159622192, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.18820302188396454, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.1800844371318817, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.15954726934432983, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09739089012145996, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08942878246307373, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11402744054794312, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10397350788116455, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09923850744962692, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08277567476034164, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0783529281616211, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.058019112795591354, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04967450723052025, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.0467078872025013, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04599380120635033, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.028969258069992065, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.023877833038568497, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.023221924901008606, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.02086453139781952, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.02039027214050293, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.014938135631382465, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.014486586675047874, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013764490373432636, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009073258377611637, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.028969258069992065, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, "bits": [ - 5 + 6, + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.2, + 0.75 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.028969258069992065, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9086425304412842, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.20094101130962372, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.180295929312706, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.17161133885383606, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.14846736192703247, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09423413127660751, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08573614805936813, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.11299263685941696, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.1014738380908966, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09600657224655151, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07829418778419495, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.0738634392619133, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05833221971988678, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04934130609035492, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04608738422393799, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04529133066534996, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029421748593449593, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02508246898651123, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.024413693696260452, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.022083373740315437, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.021590406075119972, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.016337333247065544, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.017289521172642708, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.015311822295188904, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.013366665691137314, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029421748593449593, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029421748593449593, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, "bits": [ - 5 + 6, + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.2, + 0.75 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.7.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1744139939546585, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.16391253471374512, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.16036200523376465, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1454927772283554, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08267004787921906, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07889244705438614, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09234588593244553, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08500533550977707, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08342545479536057, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.0738491415977478, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07050777971744537, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.047257378697395325, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04084286466240883, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.039784688502550125, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03953477740287781, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023659896105527878, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.020763680338859558, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02050960250198841, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01901278831064701, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01885848678648472, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012690265662968159, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01296035386621952, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01233974564820528, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.009125372394919395, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023659896105527878, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9215539693832397, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023659896105527878, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.3, + 0.7 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2460818737745285, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.23162391781806946, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2268749177455902, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.20600645244121552, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11677784472703934, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11173707991838455, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.1302361786365509, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11969833076000214, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11781907826662064, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10452838242053986, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09985370934009552, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06662773340940475, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05743898078799248, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05613122135400772, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.055818986147642136, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03337240591645241, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02908724546432495, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.028759552165865898, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02662345953285694, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.026427945122122765, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01787346974015236, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.017843013629317284, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01743674837052822, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.012249868363142014, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03337240591645241, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, "bits": [ - 5 + 5, + 3 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03337240591645241, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9255946278572083, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21275873482227325, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.19214516878128052, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.18391144275665283, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.16504958271980286, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09806504845619202, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08992431312799454, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11583350598812103, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10516547411680222, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10051722824573517, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08503907918930054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.0813075378537178, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.05905900150537491, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05044557899236679, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04724324867129326, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04646120220422745, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029657907783985138, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.024866415187716484, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.024487484246492386, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.022329967468976974, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02183711715042591, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.016030840575695038, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01614569127559662, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.014963441528379917, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011476939544081688, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029657907783985138, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029657907783985138, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9625147581100464, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.07527665793895721, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.06670542061328888, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.06146029382944107, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.05427602678537369, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.034715503454208374, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.030406946316361427, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.044016193598508835, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.04033699631690979, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.03561458736658096, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.0291992649435997, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.027919450774788857, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.02234204299747944, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.0192584078758955, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.01674753613770008, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.01610572077333927, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01119381282478571, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.008779498748481274, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.008319967426359653, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.007699012290686369, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.007279086858034134, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.005890560336410999, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.005889228545129299, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.005027501843869686, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.003964065574109554, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.03561458736658096, - "qparams": { - "group_size": 32, + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ 4, 3 ], "bits_prop": [ - 0.05, - 0.95 + 0.25, + 0.75 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.03561458736658096, - "qparams": { - "group_size": 32, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, "bits": [ + 8, 4, 3 ], "bits_prop": [ 0.05, - 0.95 + 0.1, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.8.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.06717831641435623, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.05839461460709572, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.052945543080568314, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.04661063849925995, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03062683716416359, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.026146819815039635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.039889365434646606, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.036278821527957916, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.031607650220394135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.02535308711230755, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.024376945570111275, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.020235851407051086, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.017353475093841553, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.014750746078789234, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.014083271846175194, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.010143283754587173, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.00769382156431675, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.007213674020022154, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.006654735188931227, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.006203797645866871, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.00527538638561964, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.0051860082894563675, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.004356185905635357, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.003379376605153084, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.036278821527957916, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.036278821527957916, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 + }, + { + "accuracy": 0.9668241739273071, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.19314463436603546, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.17203454673290253, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.16380205750465393, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.144192636013031, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.08960946649312973, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08139928430318832, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.10612846910953522, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.09622719138860703, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09157353639602661, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07515177130699158, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07090512663125992, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.054053016006946564, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04599130526185036, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.042990610003471375, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04224003478884697, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.026987910270690918, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.022108057513833046, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.021441500633955002, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.01918727345764637, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.018691590055823326, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.013979492709040642, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01369234174489975, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.012740977108478546, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008846103213727474, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.026987910270690918, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.026987910270690918, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, "bits": [ - 5 + 8, + 4, + 3 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.8.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.20943227410316467, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.19106929004192352, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.18354159593582153, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.16083793342113495, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09805543720722198, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.09083712100982666, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.11479770392179489, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10421435534954071, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09967539459466934, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.08350948244333267, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07892828434705734, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05891565605998039, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.050152819603681564, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04737037047743797, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04670950397849083, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029547521844506264, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02490226924419403, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02432716079056263, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.022037839516997337, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.02162449061870575, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.015752892941236496, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01607309654355049, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.014848454855382442, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011457812041044235, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029547521844506264, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9735449552536011, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029547521844506264, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.17245571315288544, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.16214603185653687, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1585252434015274, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1438879370689392, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08176963031291962, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.0780564621090889, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09163106977939606, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08422073721885681, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08255697041749954, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07307165861129761, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06995362043380737, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04690702632069588, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04042397812008858, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.039334189146757126, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03907559812068939, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023477792739868164, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02048540860414505, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02022807113826275, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.018749386072158813, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01858755759894848, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012579739093780518, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01274801604449749, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.012223192490637302, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.0089015644043684, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03907559812068939, - "qparams": { - "group_size": 32, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, "bits": [ - 6, + 8, 4 ], "bits_prop": [ - 0.1, - 0.9 + 0.05, + 0.95 ], "scale_bits": 4 } }, - "best_option": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03907559812068939, - "qparams": { - "group_size": 32, + { + "accuracy": 0.9799370169639587, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 6, 4 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24270471930503845, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.2283865511417389, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2235453575849533, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.2028806358575821, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11520105600357056, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11006578803062439, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12864671647548676, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11821718513965607, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11627756804227829, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10293669998645782, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09841156005859375, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0657714307308197, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.056657541543245316, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05529288202524185, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05497219040989876, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.0329027995467186, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02850194461643696, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.028155142441391945, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.026003943756222725, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.0257995817810297, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.017418667674064636, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01727030798792839, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01695415750145912, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011573687195777893, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.0329027995467186, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.0329027995467186, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, "bits": [ - 5 + 8, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.8.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21623101830482483, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.19576780498027802, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.18801482021808624, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.16794219613075256, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.0997619479894638, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09201712906360626, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11670727282762527, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.1061273068189621, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10210166871547699, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08641470223665237, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08213528990745544, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.059522595256567, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05095728114247322, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.0480823740363121, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04738207906484604, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030009092763066292, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.025375960394740105, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.025031037628650665, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.0227985642850399, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02235201932489872, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.016415322199463844, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01650511473417282, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015436530113220215, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011869396083056927, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030009092763066292, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9814961552619934, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030009092763066292, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.09608609974384308, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.08547408878803253, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.07913125306367874, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.06987597793340683, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.044392816722393036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.03913838043808937, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.05624210834503174, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.05106881260871887, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.04545861482620239, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03743269667029381, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03585197776556015, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.028607575222849846, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.02448769472539425, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.021486859768629074, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.020732123404741287, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.014353308826684952, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.011420676484704018, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.010883989743888378, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.010087992995977402, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.009602838195860386, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.0076724872924387455, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.007805563043802977, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.006627117283642292, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.005530139431357384, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03743269667029381, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03743269667029381, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.9.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08430670201778412, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.07414836436510086, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.0678352490067482, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.05975060164928436, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03866790235042572, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.033405303955078125, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.049936771392822266, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.04530227929353714, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.039754629135131836, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03230985626578331, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03108273819088936, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.02535715140402317, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.021698608994483948, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.01860477589070797, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.017840886488556862, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.012716904282569885, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.009708072990179062, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009145548567175865, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.0084616057574749, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.00794716365635395, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.006617494393140078, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.006506736855953932, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005514724645763636, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0042825136333703995, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03866790235042572, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03866790235042572, - "qparams": { - "group_size": 32, + }, + { + "accuracy": 0.9813458919525146, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 4, - 3 + 5, + 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.2037544846534729, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.18460127711296082, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.17754195630550385, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.15612494945526123, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09534063935279846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08801572024822235, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.10974576324224472, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.1005360409617424, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09683530777692795, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08069334179162979, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0758671686053276, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05578210577368736, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.047977183014154434, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04560890421271324, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04503163322806358, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.027845464646816254, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.023174399510025978, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02260124497115612, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.020134009420871735, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.01974155381321907, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.014278631657361984, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01376587525010109, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013378800824284554, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008367578499019146, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.027845464646816254, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.027845464646816254, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, "bits": [ - 5 + 8, + 5, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.9.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.20644669234752655, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.18663546442985535, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1785900741815567, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.15729950368404388, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.0967775210738182, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08900425583124161, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.11314134299755096, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10310288518667221, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09837757050991058, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.08159846812486649, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.077592633664608, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05808081477880478, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04951954260468483, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0466252863407135, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.045943669974803925, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029041454195976257, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.024275682866573334, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.023680822923779488, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.021233249455690384, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.02077486738562584, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.015308545902371407, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.015361121855676174, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01433885470032692, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.010527445003390312, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029041454195976257, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9828542470932007, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029041454195976257, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1754981130361557, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.16440768539905548, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1605767011642456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1453971415758133, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08315734565258026, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.0791584849357605, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09335049986839294, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08578440546989441, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08401191979646683, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07397837936878204, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07069601863622665, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04790041595697403, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04126038774847984, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04007669910788536, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03980137035250664, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.024015316739678383, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.021007802337408066, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.020733626559376717, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01919548399746418, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.019022420048713684, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012978872284293175, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.013262704014778137, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.012594575062394142, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.009484064765274525, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.024015316739678383, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, "bits": [ - 5 + 8, + 5, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.024015316739678383, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9898855090141296, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24101674556732178, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.22595550119876862, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.220860093832016, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.20001374185085297, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11422750353813171, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10882475972175598, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.1279219537973404, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11744775623083115, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11538281291723251, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10160467028617859, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09709031134843826, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.065462127327919, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05634040758013725, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05487864464521408, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05454365909099579, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03279693424701691, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02844647876918316, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02808552235364914, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.025905335322022438, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.0256899856030941, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01755821891129017, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01746896468102932, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.017068836838006973, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011976477690041065, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03279693424701691, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03279693424701691, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, "bits": [ + 8, + 6, 5 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.9.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.22701133787631989, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.2053249031305313, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.19686128199100494, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.17508578300476074, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10486748814582825, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.0965324267745018, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12298765778541565, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11190734058618546, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10731525719165802, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.09042786806821823, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08563072979450226, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.0625164806842804, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05367330089211464, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.05048885941505432, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04972093552350998, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031401168555021286, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.026527199894189835, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02615193836390972, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02372608706355095, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.023234616965055466, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01694098673760891, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01712237298488617, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015891650691628456, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012127639725804329, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031401168555021286, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9899410009384155, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031401168555021286, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.08500514179468155, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.07598269730806351, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.070783831179142, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.06198303773999214, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.039426080882549286, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.03507121279835701, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.049003150314092636, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.04470982775092125, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.04023090377449989, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03308471292257309, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03143800050020218, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.02489538863301277, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.0213722363114357, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.018994484096765518, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.018401702865958214, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.012463372200727463, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.009936477057635784, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.009495084173977375, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.008687918074429035, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.00829395093023777, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.006548936013132334, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.00655133044347167, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.005706282332539558, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.004422148689627647, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03308471292257309, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03308471292257309, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.07853467017412186, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.06945265829563141, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.06441891193389893, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.05617726594209671, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03610844537615776, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.031859803944826126, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.04543802887201309, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.04114576429128647, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03699994459748268, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.030019691213965416, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.02855815924704075, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.02301236055791378, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.019668877124786377, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.017365220934152603, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.016768423840403557, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.011501869186758995, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.00897891167551279, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.008539563976228237, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.007763924077153206, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.007365227211266756, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.005960154812783003, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.005832303781062365, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005107438191771507, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.003744209185242653, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03699994459748268, - "qparams": { - "group_size": 32, + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, "bits": [ - 4, - 3 + 8, + 6, + 5 ], "bits_prop": [ 0.05, - 0.95 + 0.1, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.03699994459748268, - "qparams": { - "group_size": 32, + { + "accuracy": 0.9928949475288391, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 4, - 3 + 6 ], "bits_prop": [ - 0.05, - 0.95 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.19163267314434052, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.17220568656921387, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.16496844589710236, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.14403098821640015, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.0895463079214096, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08207592368125916, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.10455396771430969, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.09489274024963379, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09123793989419937, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07489166408777237, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07033657282590866, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.053257185965776443, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.045373573899269104, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04290549084544182, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.042316071689128876, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02657959796488285, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.021949762478470802, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02138330042362213, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.01894567906856537, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.018553033471107483, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.013727640733122826, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.013304928317666054, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.012712102383375168, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00841550063341856, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02657959796488285, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.02657959796488285, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, "bits": [ - 5 + 8, + 6 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.10.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.20534829795360565, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.186086043715477, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1779632717370987, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.1563439667224884, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09590049833059311, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08817002177238464, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.11356530338525772, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10303518921136856, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09761801362037659, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.08159290999174118, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07725423574447632, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.058517128229141235, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.049680110067129135, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0464448481798172, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.045668795704841614, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029381396248936653, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.024631667882204056, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02398877777159214, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.021836865693330765, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.021348746493458748, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.015931570902466774, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01623755507171154, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01488475501537323, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011818048544228077, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029381396248936653, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9935243129730225, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 5 + 8, + 6 ], "bits_prop": [ - 1.0 - ], + 0.1, + 0.9 + ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.029381396248936653, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 5 + 8, + 6 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1737840324640274, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.16176047921180725, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.15746758878231049, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.14217577874660492, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08214115351438522, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07776112854480743, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.0927271693944931, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08517920970916748, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08311431109905243, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07261231541633606, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06925730407238007, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04750056192278862, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04094725102186203, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03958795964717865, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03926117718219757, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023827433586120605, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.0207760501652956, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.020475100725889206, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01889915019273758, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.018698226660490036, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012834062799811363, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.013185395859181881, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.012376349419355392, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.00943966768682003, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023827433586120605, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 5 + 8, + 6 ], "bits_prop": [ - 1.0 + 0.15, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023827433586120605, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9943441152572632, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 5 + 8, + 6 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2334842085838318, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.21761484444141388, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.21216678619384766, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1914486438035965, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11039350926876068, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10461396723985672, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12407109141349792, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11399441957473755, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.1116504818201065, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09749306738376617, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09289159625768661, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0634041503071785, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.054625723510980606, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.052976734936237335, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.0525895357131958, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03170878067612648, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02736521139740944, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.026971010491251945, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02476441115140915, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.024519026279449463, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016753027215600014, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01673191227018833, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016181733459234238, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011280654929578304, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03170878067612648, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 5 + 8, + 6 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03170878067612648, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.10.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.23594635725021362, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.21378962695598602, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.2053903490304947, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.18213973939418793, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10939012467861176, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.10100501775741577, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12785129249095917, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11609641462564468, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.11186075955629349, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.09431745111942291, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08919801563024521, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06536872684955597, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05578269809484482, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.0527440570294857, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.05200546979904175, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.032861221581697464, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02780723385512829, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.027443306520581245, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.024882718920707703, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.024411041289567947, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.017777441069483757, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.018017679452896118, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.016751857474446297, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012920309789478779, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.032861221581697464, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9949774742126465, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.032861221581697464, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.09879358112812042, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.08787421882152557, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.08192409574985504, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.07161916047334671, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.045941099524497986, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04082772880792618, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.056891582906246185, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.05174879729747772, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.04691252112388611, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.038306474685668945, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03634411096572876, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.028952352702617645, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.024814987555146217, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02222251519560814, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.021578757092356682, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01451021246612072, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.011753874830901623, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.011265803128480911, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01027810201048851, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.009856726042926311, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.00770616577938199, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.007880574092268944, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.0067916493862867355, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.00555853545665741, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.038306474685668945, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.038306474685668945, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 ], "scale_bits": 4 } } - }, - { - "key": "model.layers.11.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08425219357013702, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.07458037883043289, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.06852054595947266, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.059681981801986694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03881143778562546, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03392375260591507, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.04947918653488159, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.04503216594457626, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.039729438722133636, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03227568417787552, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.030749907717108727, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.025160863995552063, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.021537482738494873, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.018708404153585434, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01800595596432686, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.012569473125040531, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.00980303157120943, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.00928080826997757, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.008515558205544949, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008043590933084488, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.006605096161365509, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.0065672555938363075, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005615833215415478, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004407939501106739, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03881143778562546, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03881143778562546, - "qparams": { - "group_size": 32, + ], + "model.layers.1.self_attn": [ + { + "accuracy": 0.8897143602371216, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 4, - 3 + 3, + 2 ], "bits_prop": [ - 0.1, - 0.9 + 0.05, + 0.95 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.20432120561599731, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.18153199553489685, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.17292287945747375, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.15004882216453552, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09499192237854004, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08616924285888672, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11232376098632812, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10134810209274292, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09695269167423248, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.07849019765853882, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07336916029453278, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05727521330118179, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.048410844057798386, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.0454891137778759, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04479750618338585, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.028671521693468094, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.023311074823141098, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.022648394107818604, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.019924722611904144, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.01945810765028, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.014813917689025402, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.014212366193532944, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013510520569980145, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009002615697681904, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.028671521693468094, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.028671521693468094, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.20787794888019562, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.18826338648796082, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.17364339530467987, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.1539831906557083, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09773967415094376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08623389154672623, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.128134623169899, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.11503256857395172, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09974337369203568, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.08388935029506683, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.08118048310279846, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06630077213048935, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.05566585436463356, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04767987132072449, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.045652810484170914, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03319788724184036, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.025647209957242012, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02430918626487255, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.02299686335027218, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.021712182089686394, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.01777949184179306, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.0180820245295763, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.015066210180521011, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.013051002286374569, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03319788724184036, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03319788724184036, - "qparams": { - "group_size": 128, + { + "accuracy": 0.89801025390625, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1766756922006607, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.16400784254074097, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.15938597917556763, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.14374391734600067, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08361907303333282, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.0788845345377922, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09499999135732651, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08695370703935623, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08466175943613052, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07361457496881485, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07030404359102249, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04877723753452301, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04193046689033508, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04041082412004471, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.0400586873292923, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.024517806246876717, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.021399639546871185, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.0210709348320961, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01945820078253746, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01923525705933571, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.013393944129347801, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.013856660574674606, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01287431176751852, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.01017990056425333, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.024517806246876717, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.024517806246876717, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.235495924949646, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.2188098132610321, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.21303442120552063, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.19198471307754517, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11119252443313599, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10515542328357697, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12555404007434845, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11513680964708328, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11253396421670914, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09790126979351044, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09319552779197693, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06400901824235916, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.055178917944431305, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05337199196219444, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.052946824580430984, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03203866258263588, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.027537353336811066, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02711183950304985, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.024848777800798416, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02457546256482601, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016867291182279587, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01679241470992565, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016229091212153435, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011224563233554363, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03203866258263588, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03203866258263588, - "qparams": { - "group_size": 128, + { + "accuracy": 0.912111759185791, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.23754188418388367, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.21541285514831543, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.2070802003145218, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.1837923526763916, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.110403873026371, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.10194620490074158, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12889111042022705, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11703121662139893, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.1128508672118187, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.09524666517972946, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.09009711444377899, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06596417725086212, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05630800127983093, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.05327897146344185, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.05253363028168678, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03317392244935036, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02810102514922619, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.027747632935643196, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.025166140869259834, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.024703891947865486, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.017870474606752396, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.018226496875286102, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.016824204474687576, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.013121445663273335, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03317392244935036, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03317392244935036, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.10296568274497986, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.09115573018789291, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.08399426192045212, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.07347949594259262, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.04780259355902672, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.0418749563395977, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06181659549474716, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.05519252270460129, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.048945821821689606, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.03989183157682419, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03839432820677757, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03165465593338013, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.0266332495957613, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02321992814540863, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.022366270422935486, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01595565304160118, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.012451705522835255, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.011848286725580692, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01094978116452694, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.010401507839560509, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.008589660748839378, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.00865010917186737, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007322844583541155, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.00625396054238081, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03839432820677757, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03839432820677757, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08977391570806503, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.07904260605573654, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.072329081594944, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06306516379117966, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04125518724322319, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03589830547571182, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.05352729931473732, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.04808424040675163, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04233042150735855, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.034230057150125504, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.032733991742134094, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.027229562401771545, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.023052982985973358, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.019993510097265244, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.019201243296265602, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.013662248849868774, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.010553300380706787, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.00999398808926344, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009183655492961407, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.00866365060210228, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.0072051845490932465, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007191235199570656, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006076965015381575, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0049532558768987656, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.034230057150125504, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.034230057150125504, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.20148691534996033, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.18075372278690338, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.17233291268348694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.1506654918193817, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.0944347158074379, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08614452183246613, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11162907630205154, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10092511773109436, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09624972939491272, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.0789036899805069, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.07419832050800323, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.05692129209637642, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.04830602556467056, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04526880383491516, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.0445733442902565, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.028435133397579193, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.023206911981105804, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02253088913857937, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.020009126514196396, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.019504450261592865, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.01468837819993496, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.014156476594507694, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013467486016452312, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.008927283808588982, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.028435133397579193, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.028435133397579193, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9347743988037109, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.2226697951555252, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.19894306361675262, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1894831657409668, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.16768600046634674, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.10414952039718628, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.09456626325845718, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.12349046021699905, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.11180208623409271, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.10630743205547333, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.08772920072078705, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.08326394855976105, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06374773383140564, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.05402979999780655, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0505492128431797, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.049708910286426544, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.032058171927928925, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.026831667870283127, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.026041824370622635, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.02361888997256756, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.02308187447488308, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.01733775995671749, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01769978553056717, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.016193794086575508, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.012889813631772995, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.032058171927928925, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.032058171927928925, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.181032195687294, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.16773715615272522, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1629449725151062, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.14682354032993317, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08584447205066681, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.08089379221200943, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09767010807991028, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08915817737579346, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08691753447055817, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07532422244548798, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07191907614469528, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.05018544942140579, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.043113332241773605, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.041605424135923386, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04124923050403595, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.025274617597460747, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.022208401933312416, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.021874960511922836, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.020190255716443062, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.019969720393419266, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.013978850096464157, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.014591606333851814, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.013491608202457428, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.01096348650753498, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.025274617597460747, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.025274617597460747, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9363901019096375, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2444602996110916, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.2267690747976303, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.22077032923698425, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.19866417348384857, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11580356955528259, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10937059670686722, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.1303510069847107, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11970943212509155, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11719035357236862, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.1015549823641777, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.0966208428144455, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06672339886426926, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05745035782456398, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05568718910217285, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05527692660689354, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.0333840474486351, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.028923412784934044, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02848808281123638, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.026061810553073883, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.025798778980970383, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.017741631716489792, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.017907796427607536, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.017147138714790344, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.012348500080406666, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.0333840474486351, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.0333840474486351, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.23998847603797913, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.21669529378414154, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.2068501114845276, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.18316981196403503, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.11118306964635849, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.1018618792295456, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.13208557665348053, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.1199311763048172, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.11388948559761047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.09553553909063339, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.09033282101154327, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.0673409178853035, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.0576084665954113, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.053740110248327255, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.0527808777987957, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.0339440293610096, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.028476595878601074, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.027996966615319252, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.025464728474617004, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.024858688935637474, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.0184533279389143, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.018769778311252594, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.017127573490142822, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.01357610896229744, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.0339440293610096, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.0339440293610096, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9420623779296875, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.10557393729686737, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.09461566805839539, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.0890842154622078, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.07803725451231003, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.04922997206449509, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04430793225765228, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.05974746122956276, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.0545073077082634, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05016570910811424, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04128348082304001, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.039104972034692764, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03039664216339588, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.026055706664919853, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02367919683456421, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.023089969530701637, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01521360594779253, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.012312819249927998, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.011853615753352642, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01073114387691021, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.010350440628826618, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.007972387596964836, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.007927943952381611, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007107818964868784, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.005309431813657284, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.039104972034692764, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.039104972034692764, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08894427865743637, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.07967657595872879, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07423660159111023, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06499350816011429, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.041234683245420456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03674579784274101, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.05131882056593895, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.0467885360121727, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04205411672592163, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03466475382447243, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03301245719194412, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.026090772822499275, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.022367937490344048, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.019838394597172737, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.019230026751756668, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.013044154271483421, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.010323403403162956, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009865029715001583, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009016916155815125, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008602865971624851, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.006819312460720539, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.006740171927958727, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.0059456476010382175, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004441964905709028, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03466475382447243, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03466475382447243, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.21912993490695953, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.1963253766298294, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.1880096197128296, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.16409452259540558, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10235955566167831, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09363193809986115, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.11989269405603409, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10827194899320602, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.10422743111848831, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08528252691030502, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.08002869784832001, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06110893934965134, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.051684241741895676, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04900303855538368, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04834257438778877, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.030511077493429184, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.024931494146585464, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02427067793905735, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.021427009254693985, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.02097170613706112, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.015672337263822556, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.014919115230441093, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.014421388506889343, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009176467545330524, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.030511077493429184, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.030511077493429184, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.2406655102968216, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.21969515085220337, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.21111296117305756, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.18945981562137604, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.11274958401918411, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.10452254861593246, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.13150231540203094, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.11995499581098557, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.11453171074390411, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.0972740650177002, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.0936204195022583, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06770695000886917, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.05787382274866104, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.054602619260549545, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.053819287568330765, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.033956192433834076, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.028933927416801453, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.028289292007684708, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.025930186733603477, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.025427082553505898, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.018342113122344017, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.018998106941580772, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.017298854887485504, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.01387108489871025, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.033956192433834076, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.033956192433834076, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9451758861541748, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.18581879138946533, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.17217960953712463, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.16720445454120636, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1506538987159729, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08815453201532364, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.08303235471248627, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.10020043700933456, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.09181342273950577, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08929210901260376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07741519063711166, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07387197017669678, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.05148079991340637, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04431536793708801, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04266169294714928, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.042268939316272736, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02582291141152382, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.022612063214182854, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02225719392299652, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.020522307604551315, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02028128318488598, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.014001697301864624, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01467955857515335, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.013440107926726341, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.01080747414380312, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02582291141152382, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02582291141152382, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2504037022590637, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.23247285187244415, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.22623185813426971, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.20359057188034058, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11863415688276291, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11201898008584976, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.13371782004833221, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.122747041285038, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.1200532466173172, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.1040903627872467, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09901810437440872, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06843864917755127, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05888133496046066, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05702219530940056, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.056583650410175323, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.034268856048583984, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.029523910954594612, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02906818687915802, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02659722790122032, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.026314500719308853, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.018215259537100792, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01815350353717804, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.017569303512573242, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.012341786175966263, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.034268856048583984, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.034268856048583984, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9513201713562012, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2530127763748169, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.2272283285856247, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.21749764680862427, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.1919483244419098, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.11728963255882263, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.10734020173549652, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.13781239092350006, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.12497010827064514, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.12023496627807617, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.09992140531539917, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.0942864641547203, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.07052940875291824, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.06000080704689026, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.05649891495704651, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.055640675127506256, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03546382859349251, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02962188981473446, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.029217975214123726, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02622607909142971, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.025682134553790092, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.019107379019260406, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.019031066447496414, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.017927125096321106, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.013395118527114391, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03546382859349251, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03546382859349251, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.10931619256734848, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.09822147339582443, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09172064810991287, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08072160184383392, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05097634717822075, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.045577362179756165, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06374190002679825, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.057728927582502365, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.051972635090351105, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04310312494635582, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.041227005422115326, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.032431382685899734, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.02767201140522957, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02463838830590248, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.023867150768637657, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01626035012304783, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.012988058850169182, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012431623414158821, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.011445966549217701, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.010951527394354343, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.008577095344662666, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008671441115438938, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007473696023225784, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006016193423420191, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.032431382685899734, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.032431382685899734, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.0893421322107315, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08063153922557831, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07363943010568619, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06499633193016052, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04139712080359459, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03623839095234871, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.05443315580487251, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.04942895472049713, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04218301549553871, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03533395379781723, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03404676541686058, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.027540909126400948, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.02359653078019619, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.020002329722046852, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01906799152493477, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.013785040937364101, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.010470302775502205, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009848620742559433, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.00927443616092205, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008674714714288712, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.007224693428725004, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007108932826668024, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005987794138491154, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004712221212685108, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03533395379781723, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03533395379781723, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.22852709889411926, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.20551161468029022, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.19656485319137573, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.1720786690711975, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10702250897884369, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09792590141296387, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.12720555067062378, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.11372760683298111, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.10888011753559113, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08977023512125015, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0845288336277008, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06509263813495636, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05452743172645569, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.05130750313401222, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.05054308846592903, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.032586194574832916, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.026329627260565758, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.025639845058321953, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.02284286916255951, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.02232953906059265, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.01682763174176216, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01614866964519024, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.015282305888831615, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.010382777079939842, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.032586194574832916, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.032586194574832916, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9640715718269348, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.2554810345172882, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.2246054857969284, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.21225546300411224, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.18715789914131165, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.11901473999023438, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.10736414045095444, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.14327101409435272, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.12965384125709534, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.12254584580659866, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.09910979121923447, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.0943002849817276, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.07376714050769806, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.06259872019290924, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.05775994434952736, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.05656726658344269, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.036967240273952484, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.030691737309098244, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.029844090342521667, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.026836303994059563, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.02608536183834076, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.01978042535483837, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.020427294075489044, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.018220236524939537, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.014829336665570736, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.036967240273952484, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.036967240273952484, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.17568466067314148, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.16320890188217163, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.15825451910495758, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.14293111860752106, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08353152871131897, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07862897217273712, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.0955539271235466, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.0875275656580925, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08459179848432541, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07364814728498459, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.07036525756120682, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04913231357932091, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.042303621768951416, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.04050152003765106, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04007105901837349, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.024664906784892082, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.021564671769738197, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.0211966373026371, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.019657401368021965, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.019393853843212128, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.013421114534139633, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01416593138128519, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01280425675213337, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010541923344135284, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.024664906784892082, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.024664906784892082, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9649585485458374, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24776573479175568, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.23083998262882233, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2246607542037964, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.2027682512998581, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11768390983343124, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11133717000484467, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.13301898539066315, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.12207522243261337, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11902763694524765, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10382270812988281, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09887171536684036, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06811202317476273, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05861591175198555, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05663071200251579, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05616694316267967, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03410555049777031, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.029426950961351395, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.028974466025829315, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.026650412008166313, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.026353484019637108, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.018199941143393517, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.018274467438459396, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.017514191567897797, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.01260504312813282, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03410555049777031, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03410555049777031, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2391282320022583, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.21476605534553528, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.20463138818740845, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.18125399947166443, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.11071785539388657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.10093177109956741, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.13240022957324982, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11956994235515594, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.11350376904010773, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.09470190852880478, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08986657112836838, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06778758764266968, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.057624008506536484, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.05375866964459419, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.052731387317180634, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.034473955631256104, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02880938909947872, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02827424183487892, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02577541582286358, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.025119448080658913, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.019108671694993973, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.019409283995628357, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.017598798498511314, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.01440268661826849, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.034473955631256104, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.034473955631256104, - "qparams": { - "group_size": 128, + { + "accuracy": 0.972192108631134, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11914113909006119, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10793805867433548, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.10125484317541122, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.0896863043308258, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05574287101626396, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.05021871253848076, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06965204328298569, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06276470422744751, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05672720819711685, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04767656698822975, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04564221575856209, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03552639111876488, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.030155155807733536, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.027010060846805573, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.02621525526046753, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01792081445455551, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.014345195144414902, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.013782434165477753, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.012792298570275307, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.012290346436202526, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009578531607985497, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009690779261291027, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.008391131646931171, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006912411656230688, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03552639111876488, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03552639111876488, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09157740324735641, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08296307176351547, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07545457035303116, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06695849448442459, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04239138215780258, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0368819534778595, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.05688539892435074, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.05129620060324669, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04321702942252159, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03657715767621994, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0355718694627285, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.028993260115385056, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.024598293006420135, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.020500799641013145, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.019412852823734283, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.014527834951877594, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.010761290788650513, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010056974366307259, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009619710966944695, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008925262838602066, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.007600767072290182, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.0074318484403193, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006154859904199839, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0049225762486457825, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03657715767621994, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03657715767621994, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + "bits_prop": [ + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.2469998598098755, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.2266165018081665, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.2181740552186966, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.1946159154176712, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.1167704239487648, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.10852570086717606, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.13693274557590485, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.12367437034845352, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.11858733743429184, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.10082067549228668, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.09590807557106018, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06998415291309357, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05921905115246773, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.05603400617837906, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.05528515204787254, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03497388958930969, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.028623634949326515, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02794903889298439, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.025334522128105164, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.024836955592036247, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.01802448369562626, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.017282698303461075, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.016704048961400986, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.010835245251655579, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03497388958930969, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03497388958930969, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.15.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.23110757768154144, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.20842894911766052, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1998419165611267, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.17127303779125214, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.10815989226102829, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.09940153360366821, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.12608468532562256, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.11431089043617249, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.10980713367462158, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.08936914801597595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.0831465944647789, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06476783007383347, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.054881807416677475, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.052096497267484665, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.05141732469201088, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03244409337639809, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.027228111401200294, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.026584496721625328, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.023501133546233177, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.02305145002901554, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.017284519970417023, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01730262115597725, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.016356565058231354, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.012088604271411896, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03244409337639809, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9758084416389465, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03244409337639809, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.16949418187141418, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1581292599439621, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1536615490913391, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.13898685574531555, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.0806059017777443, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.0762069970369339, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09181762486696243, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08415795862674713, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08157846331596375, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07148353010416031, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06839318573474884, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0471833199262619, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04072045534849167, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03911473602056503, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03874078392982483, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.023742901161313057, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.020907165482640266, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.020578952506184578, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.019150694832205772, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.018920738250017166, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.013040225021541119, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.013804890215396881, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.012513676658272743, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010380743071436882, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03911473602056503, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03874078392982483, - "qparams": { - "group_size": 32, + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 6, + 5, 4 ], "bits_prop": [ @@ -50747,52516 +2655,59785 @@ 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2500014305114746, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.23398564755916595, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.22824740409851074, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.2064998745918274, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11866919696331024, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11275531351566315, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.1334078311920166, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.12265656888484955, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11990947276353836, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10531645268201828, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.10040820389986038, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06809842586517334, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05874885991215706, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.056935232132673264, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05651112645864487, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03405534103512764, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.029269538819789886, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.028847789391875267, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.026541301980614662, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.026268616318702698, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.017896121367812157, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.017662253230810165, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01726895198225975, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011614006944000721, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03405534103512764, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03405534103512764, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9738282561302185, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2367853820323944, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.2123509794473648, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.2025453895330429, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.18011145293712616, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10928981751203537, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09956841170787811, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12945793569087982, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11766182631254196, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.1121993288397789, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.09364007413387299, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.088687002658844, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06601382791996002, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.056502990424633026, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.05270889028906822, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.05177617818117142, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.033269282430410385, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.027784476056694984, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02733169123530388, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02474955841898918, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.024155044928193092, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.018095986917614937, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01812523789703846, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.016843410208821297, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012914570048451424, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.033269282430410385, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.033269282430410385, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 64 + }, "bits": [ 5 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11834802478551865, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10792248696088791, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1020299419760704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.09064880013465881, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05531793832778931, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.0503663569688797, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06739852577447891, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06144295632839203, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.056182876229286194, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.047600965946912766, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04546158015727997, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03428450971841812, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.029365908354520798, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02663436345756054, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.025952834635972977, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.017187630757689476, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.013882406055927277, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.013374791480600834, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01235094666481018, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.01190706342458725, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009048955515027046, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008992268703877926, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.008054476231336594, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.00605023093521595, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03428450971841812, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03428450971841812, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09501627832651138, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08685987442731857, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07993567734956741, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.07128039002418518, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04407282918691635, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.039016395807266235, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.057232316583395004, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.05234524980187416, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04481889680027962, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.038346994668245316, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03693528473377228, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.02892414480447769, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.024955322965979576, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.021208539605140686, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.020253222435712814, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.01446161512285471, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.011021004058420658, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010373946279287338, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009879237972199917, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.009252224117517471, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.007551117334514856, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007371633779257536, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006277545355260372, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00471838191151619, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.038346994668245316, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.038346994668245316, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.2511385381221771, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.23221899569034576, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.2247135192155838, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.2014438956975937, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.11888016760349274, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.11135436594486237, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.13859909772872925, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.12482474744319916, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.12048662453889847, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.10366364568471909, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.09881598502397537, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.0709017664194107, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.059720154851675034, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.05700049549341202, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.05633757263422012, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03539317473769188, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02899172529578209, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.028382157906889915, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.025814231485128403, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.025370361283421516, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.01818607747554779, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.017232052981853485, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.016896240413188934, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.010569900274276733, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03539317473769188, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03539317473769188, - "qparams": { - "group_size": 128, + { + "accuracy": 0.977654218673706, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.23944617807865143, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.21558116376399994, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.2072986662387848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.1841082125902176, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.11134349554777145, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.10260720551013947, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.1290195733308792, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.11764513701200485, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.11363252252340317, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.09481875598430634, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.08969483524560928, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06642700731754303, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.05683420971035957, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.05388619005680084, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.05319537967443466, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.033484067767858505, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.028606900945305824, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.027937335893511772, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.025404315441846848, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.024955756962299347, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.018327662721276283, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.018761619925498962, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01738305389881134, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.013747748918831348, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.033484067767858505, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.033484067767858505, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 32 + }, "bits": [ 5 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.15841388702392578, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.14798790216445923, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.14364971220493317, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.13014429807662964, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07528799027204514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07120206952095032, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08616867661476135, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07897844910621643, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07617884129285812, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.0669214278459549, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06406179070472717, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04420936107635498, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03815596178174019, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.036487042903900146, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03609417751431465, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.022209614515304565, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.01943516731262207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.019107885658740997, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.017816895619034767, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.017577147111296654, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.0120858084410429, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.012772774323821068, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.011531483381986618, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.0094992620870471, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03815596178174019, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03815596178174019, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24213647842407227, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.2271447479724884, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.22158777713775635, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.20064841210842133, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11496347934007645, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10931061953306198, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12939269840717316, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11900651454925537, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.1161145269870758, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.1022547036409378, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09758130460977554, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0660577341914177, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05700846388936043, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.055154867470264435, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05472052842378616, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03301764279603958, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.028370670974254608, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.027957795187830925, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.025791719555854797, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02551249973475933, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.0173339881002903, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.017174609005451202, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016694044694304466, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011322307400405407, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03301764279603958, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03301764279603958, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9833866953849792, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.22979648411273956, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.2048628181219101, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.19468148052692413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.17305614054203033, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10569482296705246, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.0956607535481453, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12600305676460266, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11452724039554596, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10869695246219635, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08995672315359116, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08544686436653137, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06429886817932129, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.0550084263086319, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.05097885802388191, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04997537285089493, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.032447583973407745, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.026841307058930397, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.026347676292061806, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.023779284209012985, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.023131385445594788, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.017637183889746666, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.017545830458402634, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.01627376303076744, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.01241123303771019, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.032447583973407745, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.032447583973407745, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.09849651902914047, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.09016527980566025, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.08431802690029144, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.07549328356981277, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.04594464972615242, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04143789783120155, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.05779410898685455, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.052673276513814926, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.04676736146211624, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.0400453545153141, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.038565631955862045, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.02939782291650772, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.0252103041857481, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02218778058886528, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.021430829539895058, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01472943089902401, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.011675592511892319, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.011154834181070328, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.010517533868551254, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.010028105229139328, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.007736934814602137, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.007825484499335289, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.006661816965788603, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.005358186550438404, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.038565631955862045, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.038565631955862045, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.08499576896429062, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.0774114653468132, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07070089131593704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06342221051454544, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.03920659422874451, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.034439194947481155, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.05207359045743942, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.04738237336277962, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.040042102336883545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03424961492419243, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03324545547366142, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.026332544162869453, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.02254757657647133, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.018918748944997787, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.017969779670238495, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.01316882111132145, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.00989509467035532, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.0092734070494771, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.00892177876085043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008312605321407318, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.00686654495075345, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.006757815834134817, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.005587631370872259, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004416996613144875, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03424961492419243, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03424961492419243, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.20974422991275787, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.18967899680137634, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.17920267581939697, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.161126971244812, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.09741991758346558, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08827729523181915, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.12109256535768509, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.10818429291248322, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.09972186386585236, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08428186923265457, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.08095473051071167, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.061809055507183075, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05169229209423065, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.046735603362321854, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04551789537072182, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.030882805585861206, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.024013318121433258, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.023091329261660576, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.021326692774891853, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.020504619926214218, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.015876365825533867, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.015116563998162746, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.013732590712606907, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.00950623583048582, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.030882805585861206, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.030882805585861206, - "qparams": { - "group_size": 128, + { + "accuracy": 0.987835168838501, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.23498935997486115, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.21175968647003174, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.20265182852745056, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.17787863314151764, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.10964806377887726, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.10037640482187271, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.12914538383483887, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.11721386760473251, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.11185984313488007, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.0928746685385704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.08715848624706268, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06645327806472778, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.0565902441740036, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.05313799902796745, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.05231606960296631, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.033364083617925644, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.028203872963786125, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.027432728558778763, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.02496558427810669, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.024435171857476234, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.018010716885328293, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01861351728439331, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01689978316426277, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.013601641170680523, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.033364083617925644, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.033364083617925644, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.16213412582874298, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.15138638019561768, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1469610184431076, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.13332499563694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07687914371490479, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.0726698637008667, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08786111325025558, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08058027923107147, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.0777885764837265, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.0684276670217514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06550870835781097, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04501479119062424, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03886253386735916, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.0372004397213459, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.036798808723688126, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.022628726437687874, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.019732080399990082, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.019397348165512085, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01808891072869301, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01784422993659973, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012323870323598385, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.012856220826506615, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.011761381290853024, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.009455500170588493, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03886253386735916, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03886253386735916, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24751274287700653, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.23216964304447174, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2265540063381195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.20543670654296875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11739981174468994, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11171188950538635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.13217243552207947, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.1215185895562172, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11857613176107407, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10467437654733658, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09999014437198639, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0675513818860054, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05828841030597687, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.056434012949466705, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.0559859424829483, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.033898867666721344, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.029255887493491173, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.028843242675065994, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.026701102033257484, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.026418639346957207, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.018083378672599792, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01805003546178341, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.017444288358092308, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.0123364869505167, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.033898867666721344, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.033898867666721344, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9849395155906677, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.24162714183330536, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.21436099708080292, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.20337146520614624, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.17995351552963257, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.1110849604010582, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.10019528865814209, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.13342836499214172, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.1205262616276741, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.114580899477005, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.09411433339118958, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08919180929660797, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06808274239301682, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.057948559522628784, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.053658030927181244, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.052597977221012115, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.034353505820035934, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02838532067835331, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.027880266308784485, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.025102045387029648, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02442467212677002, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.018769631162285805, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.018718652427196503, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.0173086766153574, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.013438337482511997, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.034353505820035934, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.034353505820035934, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.1134033203125, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10431405156850815, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09813790023326874, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08784269541501999, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05312630906701088, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04827982187271118, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06671261042356491, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06026889756321907, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.053988534957170486, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04650505632162094, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04474928602576256, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03407623618841171, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.0289633609354496, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.025752972811460495, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.02494891546666622, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.017139529809355736, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.013710903003811836, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.013176226988434792, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.012430978938937187, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011934518814086914, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009170987643301487, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009326168335974216, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.00799044780433178, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.00667724572122097, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03407623618841171, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03407623618841171, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.18.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09455545246601105, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08599737286567688, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07758664339780807, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06972777843475342, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04365842789411545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03774106502532959, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.05900606885552406, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.0540115162730217, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04468056187033653, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.038236431777477264, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.037297237664461136, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.029915139079093933, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.025793766602873802, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.021097905933856964, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01983974501490593, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.014966877177357674, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.011066382750868797, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010257336311042309, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009989673271775246, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.009188611060380936, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.007807972840964794, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007702371571213007, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006215110886842012, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004996858537197113, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.038236431777477264, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03774106502532959, - "qparams": { - "group_size": 32, + }, + { + "accuracy": 0.9918481707572937, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 6, - 3 + 6 ], "bits_prop": [ - 0.2, - 0.8 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.22774216532707214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.2066277116537094, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.19611692428588867, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.17706343531608582, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10658667981624603, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09700095653533936, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.13167425990104675, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.11713089793920517, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.10896463692188263, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.0924159586429596, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.08916713297367096, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06766202300786972, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.056078698486089706, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.0511941984295845, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.05000034719705582, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03390207141637802, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02627120167016983, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02533709444105625, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.023363841697573662, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.022570904344320297, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.017460696399211884, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01638142019510269, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.015230035409331322, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.01031755656003952, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03390207141637802, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03390207141637802, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "8": 32 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.20508883893489838, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.18152397871017456, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1729334443807602, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.14832565188407898, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09506472200155258, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08625737577676773, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.11182282119989395, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10147768259048462, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09691901504993439, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07783402502536774, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07340507954359055, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05763671174645424, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04930641129612923, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0464017428457737, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04569590091705322, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02913741022348404, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.025190584361553192, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.024513280019164085, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.021955439820885658, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.021512728184461594, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.016240933910012245, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.017247209325432777, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01531683374196291, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.013301326893270016, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02913741022348404, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02913741022348404, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9939449429512024, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 ], "scale_bits": 4 } } - }, - { - "key": "model.layers.18.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.15016479790210724, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1401800662279129, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.13589030504226685, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.12333641201257706, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.0711287334561348, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06712368130683899, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08146357536315918, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07494176924228668, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07201537489891052, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06331491470336914, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.060598619282245636, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04171214997768402, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.036087073385715485, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.034383442252874374, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03397276997566223, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.020920004695653915, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.018146593123674393, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.017814677208662033, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.016617586836218834, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.016366170719265938, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.011247726157307625, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.011737983673810959, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.010668868198990822, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008489351719617844, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.036087073385715485, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.036087073385715485, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + ], + "model.layers.1.mlp": [ + { + "accuracy": 0.9586436152458191, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9571729302406311, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9601583480834961, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9602230191230774, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925339221954346, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938666224479675, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941341876983643, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971394538879395, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968786835670471, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976359605789185, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975365996360779, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998705267906189, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983726739883423, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988926649093628, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989797472953796, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990217685699463, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999188244342804, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.2.self_attn": [ + { + "accuracy": 0.9931014776229858, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993735134601593, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945545792579651, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955971240997314, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963516592979431, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965707659721375, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974265694618225, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975751638412476, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976410865783691, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977284073829651, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998066782951355, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981653094291687, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998501181602478, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986154437065125, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991201162338257, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991366267204285, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992199540138245, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994032979011536, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995602965354919, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.2.mlp": [ + { + "accuracy": 0.9918777346611023, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992099404335022, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933364391326904, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937365651130676, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99604731798172, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963592886924744, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968618154525757, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979543685913086, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981528520584106, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979956746101379, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99825119972229, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9989842176437378, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999066174030304, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994069933891296, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994425177574158, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995215535163879, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9996846318244934, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.3.self_attn": [ + { + "accuracy": 0.9915900826454163, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919400811195374, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932268857955933, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946489930152893, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951656460762024, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955371022224426, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958428144454956, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962339997291565, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969381093978882, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970100522041321, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976981282234192, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979776740074158, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978837370872498, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980863332748413, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987605810165405, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998874843120575, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998869001865387, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992321729660034, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9994255900382996, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.3.mlp": [ + { + "accuracy": 0.9879165291786194, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882405996322632, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900472164154053, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906170964241028, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940593242645264, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945236444473267, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952510595321655, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99693363904953, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972295761108398, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969890713691711, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973717927932739, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998477041721344, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986136555671692, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991227388381958, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991740584373474, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992906451225281, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9995541572570801, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.4.self_attn": [ + { + "accuracy": 0.9893314838409424, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896973371505737, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915299415588379, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993395984172821, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943023324012756, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948629140853882, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995294451713562, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960069060325623, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962986707687378, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963521361351013, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971089363098145, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975385665893555, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973883628845215, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978293180465698, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984217286109924, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987038969993591, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9985672831535339, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990589022636414, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9992339015007019, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.4.mlp": [ + { + "accuracy": 0.9832739233970642, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837552905082703, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864385724067688, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872525930404663, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917817115783691, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99244624376297, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934935569763184, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957400560379028, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961544871330261, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958277344703674, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963674545288086, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978858828544617, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980775713920593, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987711906433105, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988454580307007, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990134239196777, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9993603825569153, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.5.self_attn": [ + { + "accuracy": 0.9854546785354614, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859250783920288, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879124164581299, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907240271568298, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925198554992676, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930697679519653, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942118525505066, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949459433555603, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995452344417572, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955569505691528, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966188669204712, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968560934066772, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997003972530365, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971993565559387, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981317520141602, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981817603111267, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984111785888672, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986324310302734, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9991827011108398, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.5.mlp": [ + { + "accuracy": 0.9777228236198425, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783464670181274, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819249510765076, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829802513122559, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989036500453949, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899209141731262, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913098812103271, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943475127220154, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948858022689819, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944320917129517, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951605200767517, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971879720687866, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974560737609863, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983875155448914, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984742403030396, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986993074417114, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.999180793762207, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.6.self_attn": [ + { + "accuracy": 0.9838966131210327, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845512509346008, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866582155227661, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899773001670837, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915726780891418, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922065138816833, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933969974517822, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941956400871277, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948878884315491, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951915740966797, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961211085319519, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963641166687012, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964805245399475, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966680407524109, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979742169380188, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979009628295898, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983076453208923, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983185529708862, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990394115447998, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.6.mlp": [ + { + "accuracy": 0.9729506373405457, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9736834764480591, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781535267829895, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794583916664124, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866710901260376, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.987760066986084, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894878268241882, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993111789226532, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937809705734253, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932279586791992, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941135048866272, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965791702270508, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968996047973633, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980359673500061, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998151957988739, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984371066093445, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9990161657333374, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.7.self_attn": [ + { + "accuracy": 0.97994464635849, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9807116985321045, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829738736152649, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873581528663635, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897590279579163, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900363087654114, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926924109458923, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930732846260071, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935360550880432, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939464926719666, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950830340385437, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995278537273407, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955549240112305, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958783388137817, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975118041038513, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975857138633728, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980112314224243, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9981045126914978, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.998754620552063, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.7.mlp": [ + { + "accuracy": 0.9679389595985413, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9688228964805603, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9739444255828857, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9754302501678467, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841660261154175, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854965209960938, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874618053436279, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918500185012817, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926317930221558, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919378757476807, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930209517478943, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959290027618408, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963070750236511, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997646152973175, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977672696113586, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980841875076294, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9987619519233704, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.8.self_attn": [ + { + "accuracy": 0.979193925857544, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799320697784424, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820204973220825, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867651462554932, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887349009513855, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989662230014801, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915907382965088, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928423166275024, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935099482536316, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939172863960266, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948223233222961, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950948357582092, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952493906021118, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99574214220047, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971402883529663, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974431991577148, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976405501365662, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980391263961792, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986903667449951, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.8.mlp": [ + { + "accuracy": 0.9645816683769226, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9655853509902954, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9712508320808411, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9728893637657166, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825301170349121, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839823842048645, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861629009246826, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.990994393825531, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918584823608398, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911091327667236, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922904968261719, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955040216445923, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959192872047424, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974016547203064, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975313544273376, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978799223899841, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986280202865601, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.9.self_attn": [ + { + "accuracy": 0.9758711457252502, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9765942096710205, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9788267612457275, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837216138839722, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877364635467529, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882211685180664, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912638068199158, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918776750564575, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930226802825928, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929018616676331, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939651489257812, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943929314613342, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945930242538452, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949879050254822, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969010353088379, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997054398059845, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975370764732361, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977070093154907, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984327554702759, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.9.mlp": [ + { + "accuracy": 0.9607599377632141, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9618975520133972, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9682748317718506, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9701502323150635, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9805476665496826, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821865558624268, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847054481506348, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899374842643738, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909099340438843, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.990075409412384, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914048910140991, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994977593421936, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954314827919006, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970723986625671, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972130060195923, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976070523262024, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9984055757522583, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.10.self_attn": [ + { + "accuracy": 0.9725799560546875, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9735370874404907, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9757736921310425, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813588261604309, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860193729400635, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866032004356384, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903626441955566, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911735653877258, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9918287992477417, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992388129234314, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932198524475098, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937389492988586, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940122365951538, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943007230758667, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965928792953491, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967969059944153, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973801374435425, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976330995559692, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9986262321472168, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.10.mlp": [ + { + "accuracy": 0.9586094617843628, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9598590731620789, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9669257402420044, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.969009518623352, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794173240661621, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811974763870239, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839715361595154, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892567992210388, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903696775436401, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894953966140747, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909318685531616, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946850538253784, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951795339584351, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968997836112976, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970772862434387, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975264668464661, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9983565211296082, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.11.self_attn": [ + { + "accuracy": 0.9670879244804382, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.968355119228363, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.971223771572113, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779627323150635, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826411604881287, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836264848709106, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872956275939941, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885817766189575, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899051189422607, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903085231781006, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917274117469788, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992371678352356, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926628470420837, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931877851486206, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956628680229187, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959269165992737, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99649977684021, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967559576034546, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980832934379578, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.11.mlp": [ + { + "accuracy": 0.955284059047699, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9566867351531982, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9643339514732361, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9665992259979248, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777832627296448, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.979679524898529, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9827012419700623, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883722066879272, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895597696304321, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886635541915894, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901836514472961, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994249165058136, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947603940963745, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966254234313965, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968472719192505, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973378777503967, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982350468635559, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.12.self_attn": [ + { + "accuracy": 0.9617105722427368, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9630410075187683, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9666846990585327, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9735223054885864, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794604182243347, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801919460296631, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9843897223472595, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852853417396545, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865375757217407, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871546626091003, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900346994400024, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911723732948303, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909505844116211, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916436672210693, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948593974113464, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953621029853821, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957807064056396, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967799186706543, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982696175575256, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.12.mlp": [ + { + "accuracy": 0.9503874778747559, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9519993662834167, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9600827097892761, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.962498664855957, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9752492904663086, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.97742760181427, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806284308433533, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.987040638923645, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883536100387573, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873459935188293, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890672564506531, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935483932495117, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941152930259705, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961507320404053, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964143633842468, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9969189763069153, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979041218757629, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.13.self_attn": [ + { + "accuracy": 0.9616034030914307, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9630017280578613, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9663815498352051, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9751613736152649, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801487922668457, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980673611164093, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9868801832199097, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875988960266113, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892485737800598, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899051785469055, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903819561004639, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991085410118103, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916311502456665, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922329187393188, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951127171516418, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995419442653656, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963069558143616, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964895844459534, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9980978965759277, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.13.mlp": [ + { + "accuracy": 0.946179986000061, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9478803873062134, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.956791341304779, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9595044255256653, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9730897545814514, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9754396080970764, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789832234382629, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858712553977966, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873309135437012, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862549901008606, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881277084350586, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930375814437866, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936662912368774, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995908260345459, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961584806442261, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967352151870728, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9978141784667969, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.14.self_attn": [ + { + "accuracy": 0.9560854434967041, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9579728841781616, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9621978998184204, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9715605974197388, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9768226146697998, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9780182838439941, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9840877652168274, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858044385910034, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872329831123352, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.987960934638977, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.988861083984375, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896323084831238, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902233481407166, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910378456115723, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941582083702087, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948096871376038, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953693151473999, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961796402931213, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977453351020813, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.14.mlp": [ + { + "accuracy": 0.9417877197265625, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9435851573944092, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9529929757118225, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.955840528011322, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9710280299186707, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9735138416290283, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9772362112998962, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846689105033875, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862210154533386, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851745367050171, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871383905410767, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924445152282715, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930325746536255, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954614043235779, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957907795906067, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963810443878174, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997502863407135, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.15.self_attn": [ + { + "accuracy": 0.949327290058136, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9508466124534607, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9560520052909851, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.96513831615448, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9737013578414917, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.974422812461853, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812223315238953, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821009039878845, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.984283983707428, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847089648246765, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9863450527191162, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880592226982117, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883270859718323, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894152879714966, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927354454994202, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943436980247498, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938760995864868, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962157607078552, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965468049049377, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.15.mlp": [ + { + "accuracy": 0.9362263083457947, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9381191730499268, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.948416531085968, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.951540470123291, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9682415127754211, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9709169864654541, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9750096797943115, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9833904504776001, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850209951400757, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837822914123535, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859458804130554, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917725324630737, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924939274787903, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951536059379578, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954444766044617, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960945844650269, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973757266998291, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.16.self_attn": [ + { + "accuracy": 0.9496893882751465, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9513397812843323, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9564887285232544, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9668439626693726, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743176102638245, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9752395153045654, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830241203308105, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9842952489852905, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858899116516113, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9861603379249573, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875363111495972, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9882857203483582, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890918731689453, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897154569625854, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936967492103577, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940474033355713, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951485991477966, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955220222473145, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9977002739906311, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.16.mlp": [ + { + "accuracy": 0.9281431436538696, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9302598237991333, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.941763162612915, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9452990293502808, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9642248153686523, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9672514200210571, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9718483686447144, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.981242835521698, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830756783485413, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9817640781402588, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841746091842651, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907513856887817, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915482401847839, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945477247238159, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948880076408386, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956197738647461, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9970610737800598, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.17.self_attn": [ + { + "accuracy": 0.9495404958724976, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9512749910354614, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9553503394126892, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9661189317703247, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9733887314796448, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743125438690186, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813157916069031, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825544357299805, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846342206001282, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848491549491882, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870631694793701, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878540635108948, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884703755378723, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.988736629486084, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930447936058044, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934720993041992, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943779706954956, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949839115142822, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971253871917725, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.17.mlp": [ + { + "accuracy": 0.9204462766647339, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.922762930393219, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9355055093765259, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9394989609718323, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9602987766265869, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9636257886886597, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9687509536743164, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.979101836681366, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811719059944153, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797309041023254, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823935031890869, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896789789199829, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99055415391922, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938597083091736, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942805171012878, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950953125953674, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966567158699036, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.18.self_attn": [ + { + "accuracy": 0.9472419619560242, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9492505788803101, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9540770649909973, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9635224938392639, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.972008228302002, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9727579355239868, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794401526451111, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9803037643432617, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.98283851146698, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832983613014221, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859291315078735, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869702458381653, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873729944229126, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884445667266846, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903626441955566, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933878183364868, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912236332893372, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995164692401886, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963909387588501, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.18.mlp": [ + { + "accuracy": 0.9147318005561829, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9172266125679016, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9309174418449402, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9352928996086121, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9574783444404602, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9610078930854797, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9665426015853882, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9775072932243347, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797447323799133, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782940149307251, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811241030693054, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889386296272278, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898707866668701, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933951497077942, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938753247261047, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947614073753357, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9964168667793274, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.19.self_attn": [ + { + "accuracy": 0.9462279081344604, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9479885101318359, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9531593918800354, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9642282724380493, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9721946716308594, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9731534123420715, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9807776808738708, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820469617843628, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825072288513184, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.982744038105011, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865202307701111, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873852133750916, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881358742713928, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989044725894928, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925220608711243, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933550953865051, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938499927520752, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994754433631897, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965014457702637, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.19.mlp": [ + { + "accuracy": 0.9135115146636963, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9160161018371582, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9296411871910095, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9340200424194336, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9568274617195129, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9604782462120056, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.966032862663269, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9771766066551208, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9794261455535889, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779186248779297, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9808058738708496, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886491298675537, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989617645740509, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930881857872009, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936510920524597, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945109486579895, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961335062980652, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.20.self_attn": [ + { + "accuracy": 0.951574444770813, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9533187747001648, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9565252065658569, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9653555154800415, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9744277000427246, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9760388731956482, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9809256196022034, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829497337341309, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.985108494758606, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855442643165588, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879812002182007, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9886623024940491, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892413020133972, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897162914276123, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934330582618713, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994057297706604, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945252537727356, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953657984733582, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974408149719238, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.20.mlp": [ + { + "accuracy": 0.9134874939918518, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9159051775932312, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9290798902511597, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9332295656204224, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9570102691650391, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9604588747024536, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9658015370368958, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9775006175041199, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9796306490898132, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781208634376526, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980923593044281, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.988882303237915, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898780584335327, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934699535369873, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938899874687195, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947423934936523, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965320229530334, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.21.self_attn": [ + { + "accuracy": 0.9579365253448486, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9593603610992432, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.962439775466919, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9709607362747192, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9768148064613342, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784423112869263, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825078845024109, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845938086509705, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865286350250244, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867554306983948, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989094614982605, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.990001916885376, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901536703109741, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908937811851501, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937113523483276, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948618412017822, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945865273475647, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962268471717834, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973360300064087, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.21.mlp": [ + { + "accuracy": 0.9142164587974548, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9166189432144165, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9293040633201599, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9331812262535095, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9575027823448181, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9608744978904724, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9659358859062195, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779309630393982, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799544215202332, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.978387176990509, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811546206474304, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890344738960266, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900603294372559, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993633508682251, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940203428268433, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948253035545349, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967038631439209, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.22.self_attn": [ + { + "accuracy": 0.9632883071899414, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9650044441223145, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9684069752693176, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9748721122741699, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9788295030593872, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9809853434562683, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829928278923035, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9856275320053101, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871335029602051, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873260855674744, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900816082954407, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908553957939148, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909263253211975, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916619658470154, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939812421798706, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955564737319946, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994598925113678, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996809184551239, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974508881568909, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.22.mlp": [ + { + "accuracy": 0.9141162633895874, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9164333343505859, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.92881840467453, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9326133131980896, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9573491215705872, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9606796503067017, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9656444787979126, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778407216072083, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799044132232666, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783344268798828, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9810835123062134, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890417456626892, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900781512260437, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936971068382263, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940574169158936, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948578476905823, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967986941337585, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.23.self_attn": [ + { + "accuracy": 0.9588616490364075, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9600909948348999, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.963727593421936, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9704663753509521, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785836338996887, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797202944755554, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837622046470642, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853144884109497, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867044687271118, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865787625312805, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896817207336426, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9906028509140015, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907790422439575, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911060929298401, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943594932556152, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948871731758118, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951303601264954, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962123036384583, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976713061332703, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.23.mlp": [ + { + "accuracy": 0.9137842655181885, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9161549806594849, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9281762838363647, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9318503737449646, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9571259617805481, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9605122208595276, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9653484225273132, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777513146400452, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798051714897156, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.978219211101532, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9809853434562683, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889601469039917, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900136590003967, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936237335205078, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994016170501709, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947901964187622, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996774435043335, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.24.self_attn": [ + { + "accuracy": 0.9604657292366028, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9618702530860901, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9655463695526123, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9720821380615234, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785383343696594, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9802395701408386, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837058186531067, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9859091639518738, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870471358299255, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875118732452393, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899595379829407, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907053709030151, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912053346633911, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991765558719635, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945439696311951, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995238721370697, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953223466873169, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963930249214172, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975873231887817, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.24.mlp": [ + { + "accuracy": 0.9144538640975952, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9169031977653503, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9286363124847412, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9321852922439575, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9575070738792419, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9609590172767639, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9656198024749756, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9780130982398987, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800183773040771, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.978425145149231, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9812023043632507, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890756607055664, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901465177536011, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936937093734741, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994040846824646, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947776794433594, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967461228370667, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.25.self_attn": [ + { + "accuracy": 0.959446370601654, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.961163341999054, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9643687009811401, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9711068868637085, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779682755470276, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795083999633789, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832007884979248, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853658676147461, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.986575186252594, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.986462414264679, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895586371421814, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904395341873169, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908111691474915, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916126132011414, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943788647651672, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950695633888245, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995166540145874, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961365461349487, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976977705955505, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.25.mlp": [ + { + "accuracy": 0.9137674570083618, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9162902235984802, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9278544783592224, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9313666224479675, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9572494626045227, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9606402516365051, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9652101993560791, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777649641036987, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798120260238647, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782575368881226, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.981042206287384, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889747500419617, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900212287902832, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935933351516724, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940627813339233, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947980046272278, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968298673629761, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.26.self_attn": [ + { + "accuracy": 0.960106372833252, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9617081880569458, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9654035568237305, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9722895622253418, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785770773887634, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9796422719955444, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9839475154876709, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9852089881896973, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9872868061065674, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.987247884273529, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896945953369141, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904531240463257, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9908612370491028, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991355299949646, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941244125366211, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994990885257721, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948787689208984, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961293339729309, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975690841674805, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.26.mlp": [ + { + "accuracy": 0.9134653806686401, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.915968656539917, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9272154569625854, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9306942820549011, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9570654034614563, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9604764580726624, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9649356603622437, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776108264923096, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9796403050422668, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781503677368164, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9809421300888062, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888944029808044, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899412393569946, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934962391853333, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939254522323608, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946300387382507, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966022968292236, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.27.self_attn": [ + { + "accuracy": 0.9605412483215332, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9620692133903503, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9666094183921814, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9733466506004333, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787718653678894, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799286723136902, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841371178627014, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855815768241882, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873479008674622, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877516031265259, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897288680076599, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.990722119808197, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907761812210083, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917694330215454, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945033192634583, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952772855758667, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953537583351135, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996584415435791, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9973593950271606, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.27.mlp": [ + { + "accuracy": 0.91444993019104, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9170364737510681, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.92792809009552, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9313673377037048, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9575399160385132, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9609099626541138, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9652475118637085, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9777263402938843, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797619581222534, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783930778503418, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811199307441711, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889929294586182, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989992082118988, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993497371673584, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939659833908081, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946502447128296, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965782165527344, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.28.self_attn": [ + { + "accuracy": 0.9536578059196472, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.955558180809021, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9600222706794739, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.968798041343689, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9762941598892212, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9772810935974121, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830692410469055, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844521284103394, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857773184776306, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865545630455017, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883972406387329, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890601634979248, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898623824119568, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903740286827087, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993773341178894, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942324161529541, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99479079246521, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954952001571655, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971802234649658, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.28.mlp": [ + { + "accuracy": 0.9128372669219971, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9155550003051758, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9265547394752502, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9301450252532959, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9567434191703796, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9602177739143372, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9646229147911072, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770823121070862, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9792038798332214, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779440760612488, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980739414691925, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887164831161499, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896910190582275, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931918382644653, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993688702583313, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943700432777405, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962068796157837, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.29.self_attn": [ + { + "accuracy": 0.9396234154701233, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9413683414459229, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9491278529167175, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9596350193023682, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9701108932495117, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9707603454589844, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9795035123825073, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980247437953949, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9811848402023315, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823737144470215, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848869442939758, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847090840339661, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854401350021362, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9874797463417053, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992760956287384, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928350448608398, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942266941070557, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948742985725403, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975103139877319, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.29.mlp": [ + { + "accuracy": 0.9127777814865112, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9155042767524719, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9266650080680847, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9302945137023926, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9563784599304199, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9598891139030457, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9645258784294128, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.976826548576355, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9789323806762695, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776586294174194, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980413019657135, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9885505437850952, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892295002937317, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929922819137573, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993586540222168, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942957758903503, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960763454437256, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.30.self_attn": [ + { + "accuracy": 0.9334838390350342, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9362443685531616, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9473037719726562, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9579559564590454, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.966972827911377, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9693659543991089, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9754902124404907, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784017205238342, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980220377445221, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9808425903320312, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9847986698150635, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851189255714417, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864095449447632, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871166348457336, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9919771552085876, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931991100311279, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932018518447876, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957214593887329, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9974281787872314, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.30.mlp": [ + { + "accuracy": 0.9020891785621643, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9050162434577942, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9176779985427856, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9216998815536499, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9506426453590393, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9545009136199951, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9597950577735901, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.973039984703064, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9755874872207642, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.974057137966156, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770920276641846, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.18.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.23560285568237305, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.22096383571624756, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.21543966233730316, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.19531461596488953, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11164174228906631, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10611743479967117, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.1260228008031845, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11595653742551804, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11283308267593384, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09961267560720444, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09504253417253494, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06432482600212097, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05559359863400459, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05362987518310547, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.053165800869464874, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03223453089594841, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.027724744752049446, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.027301473543047905, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.025290189310908318, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.024992385879158974, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.017070962116122246, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01699807308614254, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016389627009630203, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011426583863794804, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03223453089594841, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9865967035293579, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, "bits": [ + 8, + 6, 5 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03223453089594841, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9869742393493652, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.22932279109954834, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.20246820151805878, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.1909995824098587, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.16875828802585602, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10530436038970947, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09423814713954926, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12806963920593262, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11552858352661133, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10875292122364044, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08875355124473572, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08447183668613434, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06553970277309418, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05570965260267258, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.05100630968809128, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04984699562191963, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.033231087028980255, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.027283521369099617, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.026716677471995354, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.0241440087556839, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02340027503669262, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01841479167342186, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.018450286239385605, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.01681932434439659, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.013565902598202229, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.033231087028980255, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, "bits": [ + 8, + 6, 5 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.033231087028980255, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9912995100021362, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.19.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11396817862987518, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10528426617383957, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09884271770715714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.0890628919005394, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05337546393275261, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.048502109944820404, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.0672643855214119, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06107862666249275, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05426878109574318, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04713341221213341, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04559051990509033, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03435263782739639, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.0292690210044384, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.025784049183130264, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.024911992251873016, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.017212720587849617, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.013590029440820217, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.013007576577365398, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.012374422512948513, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011825807392597198, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009155038744211197, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009128699079155922, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007925592362880707, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0063005480915308, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03435263782739639, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03435263782739639, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + { + "accuracy": 0.992099940776825, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.19.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09147840738296509, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08401284366846085, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07525162398815155, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06792691349983215, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04219665750861168, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0364471860229969, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.05814434587955475, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.053469035774469376, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.043066587299108505, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.037421733140945435, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03670908510684967, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.029502494260668755, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.02537667751312256, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.02044646441936493, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.019106866791844368, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.014778862707316875, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.010753311216831207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009911486878991127, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009814811870455742, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008973382413387299, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.007748804986476898, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007595078554004431, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.0060561359860002995, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.004954168107360601, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.037421733140945435, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.037421733140945435, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + }, + { + "accuracy": 0.9928672909736633, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945233464241028, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 ], "scale_bits": 4 } } - }, - { - "key": "model.layers.19.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.238096684217453, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.21803995966911316, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.20855367183685303, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.1882338970899582, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.11167824268341064, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.10298355668783188, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.13501127064228058, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.12096233665943146, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.11378046870231628, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.09758538752794266, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.09378048777580261, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06915497779846191, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05785024166107178, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.05352773889899254, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.052494946867227554, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.034598518162965775, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.0273052416741848, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02648245170712471, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.02440197579562664, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.023711171001195908, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.01771772839128971, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01664835959672928, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.01570408046245575, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.010204070247709751, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.034598518162965775, - "qparams": { - "group_size": 128, + ], + "model.layers.31.self_attn": [ + { + "accuracy": 0.927497386932373, + "total_bits": 89141248, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9322388768196106, + "total_bits": 91697152, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9439810514450073, + "total_bits": 95234560, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9599300622940063, + "total_bits": 111748096, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.034598518162965775, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.2165856510400772, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.19531875848770142, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1880383938550949, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.1655879020690918, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.10093660652637482, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.09320151805877686, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.11764410883188248, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.1057371124625206, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.10251390933990479, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.08533772081136703, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.08091210573911667, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.060522355139255524, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.05150681734085083, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04932630807161331, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.048835452646017075, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.030595432966947556, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.026869572699069977, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02634814940392971, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.02397499606013298, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.023667702451348305, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.017091041430830956, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.018392756581306458, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01640268601477146, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.014423463493585587, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.030595432966947556, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.030595432966947556, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9659322500228882, + "total_bits": 132388864, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.14373646676540375, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.13440322875976562, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1304660141468048, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.11847411841154099, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.06806723773479462, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06433354318141937, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.0776711031794548, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07145927846431732, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.0688907727599144, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06064372882246971, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.05807119235396385, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.03973531350493431, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.034374333918094635, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03283986821770668, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03247145190834999, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.019926387816667557, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.017262563109397888, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01696011982858181, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.015800688415765762, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.015576718375086784, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.010713733732700348, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.011050408706068993, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.010198884643614292, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.007896055467426777, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.034374333918094635, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.034374333918094635, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.22957558929920197, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.21547116339206696, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2101588249206543, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1909044235944748, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.10877019912004471, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10349796712398529, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12288442254066467, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11291780322790146, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.10992506891489029, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09721522778272629, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09305469691753387, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06271566450595856, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.054172296077013016, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.052299242466688156, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05186298117041588, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.0314735472202301, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.027161797508597374, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.026761192828416824, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02484051324427128, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.024560851976275444, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016893090680241585, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.016828713938593864, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016271624714136124, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011544492095708847, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.0314735472202301, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.0314735472202301, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2158072292804718, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.18971088528633118, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.17754331231117249, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.1565222293138504, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09905649721622467, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08764586597681046, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12205614149570465, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11049900203943253, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10233386605978012, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08315172046422958, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07923205941915512, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06251193583011627, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.053568314760923386, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04813642427325249, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.046801164746284485, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031848933547735214, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02602596953511238, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.025337379425764084, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02304757945239544, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02217230387032032, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01789283938705921, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01804814115166664, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.016159923747181892, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.01344848982989788, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031848933547735214, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031848933547735214, - "qparams": { - "group_size": 128, + { + "accuracy": 0.966603696346283, + "total_bits": 132455936, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11362796276807785, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10476888716220856, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.0969642698764801, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08789283782243729, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05306685343384743, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04740219935774803, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.069229356944561, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.062807098031044, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.0540165901184082, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04703442007303238, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.045774661004543304, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03529078885912895, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.030077874660491943, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02572445571422577, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.024600442498922348, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.017735207453370094, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.01364781241863966, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012933396734297276, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01248316653072834, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.01179618202149868, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009443800896406174, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009433423168957233, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007907978259027004, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006571589037775993, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03529078885912895, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03529078885912895, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.0921689122915268, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08393833786249161, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07342083007097244, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06674665957689285, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04237308353185654, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.035399436950683594, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.061067454516887665, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.0556681826710701, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04342229664325714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.037531230598688126, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03708833456039429, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03085969388484955, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.02651813067495823, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.020527061074972153, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01888495683670044, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.015438606962561607, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.01084985677152872, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009825139306485653, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009901002049446106, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008866907097399235, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.0080669354647398, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.007883530110120773, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.0060422783717513084, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.005101976916193962, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.037531230598688126, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.037531230598688126, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.23413993418216705, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.2114434689283371, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.20034950971603394, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.1812375783920288, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10890550911426544, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09878281503915787, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.1348365843296051, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.11997293680906296, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.11139725893735886, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.09432223439216614, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.09103567898273468, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06939771771430969, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.057319171726703644, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.05228612199425697, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.0510372668504715, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.034745119512081146, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02678781934082508, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02580316551029682, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.023790579289197922, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.022958073765039444, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.01783915050327778, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01666460558772087, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.015344521962106228, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.010389380156993866, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.034745119512081146, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.034745119512081146, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.23066669702529907, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.19920606911182404, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.18726421892642975, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.1603328436613083, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.10729500651359558, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.09477052092552185, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.1268506646156311, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.11577291786670685, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.1096307635307312, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.0847679153084755, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07985208928585052, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.065435491502285, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.0558902770280838, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0520491823554039, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.051125891506671906, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03291189670562744, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02777101658284664, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.026909643784165382, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.023479962721467018, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.02285405434668064, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.01779889687895775, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01856057345867157, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.016564566642045975, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.013674319721758366, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03291189670562744, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03291189670562744, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9776991605758667, + "total_bits": 169089024, + "q_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1485532522201538, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.13935352861881256, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.13572831451892853, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.12326061725616455, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07037222385406494, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06684733927249908, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.07951859384775162, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07331951707601547, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.0711417868733406, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06290644407272339, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06010854244232178, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0405619740486145, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03520859032869339, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03387495130300522, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.033562071621418, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02030816301703453, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.017691228538751602, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01741916686296463, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.0162078607827425, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.016012270003557205, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.010807600803673267, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01113481167703867, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.010361677035689354, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.007809889502823353, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03520859032869339, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03520859032869339, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.23631809651851654, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.222255140542984, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.21719972789287567, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1972542107105255, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11181111633777618, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10670308768749237, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12540817260742188, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11549955606460571, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11294069141149521, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10008951276540756, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09560860693454742, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06393466889858246, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05529307574033737, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.053639087826013565, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05324132367968559, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03203509375452995, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02757939323782921, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.027201518416404724, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02518981508910656, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02494010142982006, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016980553045868874, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01664607785642147, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016411038115620613, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.010965084657073021, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03203509375452995, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03203509375452995, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21336324512958527, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.18806280195713043, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.17703531682491302, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.15646307170391083, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09782399982213974, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.0872444361448288, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11825019121170044, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10761673003435135, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10092329233884811, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08208554983139038, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07819324731826782, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06043928116559982, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05174577981233597, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.047243811190128326, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04613037034869194, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.0306093767285347, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02502012811601162, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02445255219936371, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.021986698731780052, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02126178704202175, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.016828343272209167, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016644828021526337, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015345161780714989, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011891290545463562, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.0306093767285347, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.0306093767285347, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9788033962249756, + "total_bits": 169221632, + "q_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.1087748184800148, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.0996377021074295, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09215744584798813, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.0836109146475792, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05076252296566963, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04517727717757225, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06616899371147156, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.059966642409563065, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.051791537553071976, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.044785600155591965, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04368341714143753, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.033791813999414444, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.028728513047099113, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.024584995582699776, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.023512959480285645, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.016949476674199104, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.013018033467233181, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012334139086306095, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.011857783421874046, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011203859001398087, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.00904722698032856, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008966398425400257, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007589447777718306, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006206808146089315, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.033791813999414444, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.033791813999414444, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09276153892278671, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08424203842878342, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07436823844909668, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.06761875748634338, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04281033203005791, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.035986512899398804, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06080431491136551, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.0552309975028038, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04388817772269249, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03779663145542145, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03740648925304413, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.030883409082889557, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.026447510346770287, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.020745253190398216, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01918047107756138, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.01541061419993639, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.010943888686597347, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.009962611831724644, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.009961159899830818, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.008975762873888016, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.008043177425861359, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.00787246786057949, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006060457322746515, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.005109983030706644, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03779663145542145, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.03779663145542145, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + "bits_prop": [ + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.2240171581506729, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.20027466118335724, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.1878354400396347, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.16946975886821747, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10348161309957504, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09228029102087021, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.13126684725284576, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.1168353259563446, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.10634415596723557, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.08908626437187195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.08624960482120514, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.0677431970834732, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.0558772012591362, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.04966858774423599, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04810548946261406, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.033941175788640976, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02544441446661949, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.024273119866847992, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.022516345605254173, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.021481750532984734, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.017439130693674088, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.016110477969050407, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.01449537742882967, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009919766336679459, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.033941175788640976, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.033941175788640976, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.21.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.2319677621126175, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.20181916654109955, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.19097013771533966, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.16114209592342377, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.1071549504995346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.09543783962726593, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.125030979514122, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.11503221839666367, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.10984554886817932, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.08585154265165329, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07858827710151672, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06419889628887177, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.055259671062231064, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0516650453209877, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.0507882758975029, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03220231086015701, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.027113359421491623, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.026278063654899597, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.02294106036424637, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.02234433963894844, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.01724071614444256, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.017505459487438202, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.016034789383411407, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.012299149297177792, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03220231086015701, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9798865914344788, + "total_bits": 170671104, + "q_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03220231086015701, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.14525185525417328, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.13654862344264984, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.13318641483783722, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.12102137506008148, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.06885591149330139, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.0655551478266716, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.07742844521999359, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07142996788024902, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.06956322491168976, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06163189560174942, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.058964986354112625, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.03953561186790466, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03429649770259857, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03314146399497986, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.032866962254047394, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.019814126193523407, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.017259320244193077, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01701478287577629, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.015816085040569305, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.015642961487174034, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.010576553642749786, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.010779019445180893, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01020046602934599, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.0075100865215063095, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03429649770259857, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03429649770259857, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24185307323932648, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.2277171015739441, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.22282017767429352, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.20250071585178375, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11455659568309784, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10944624990224838, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.1278344988822937, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11797790974378586, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11564202606678009, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10266052186489105, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09810636192560196, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06519908457994461, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.056458525359630585, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05493622273206711, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05457767844200134, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03260207921266556, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02822943776845932, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.027872774749994278, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.025802338495850563, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.025572815909981728, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.017180386930704117, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01699347048997879, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016672471538186073, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011178615503013134, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03260207921266556, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "4": 64 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03260207921266556, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9798445701599121, + "total_bits": 173039616, + "q_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21106190979480743, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.18602249026298523, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.17494799196720123, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.15562938153743744, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09632829576730728, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08583444356918335, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.1170273870229721, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10670015215873718, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.09960673749446869, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08149532228708267, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07777796685695648, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.059477902948856354, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05124852806329727, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04653557389974594, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.045350801199674606, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030171139165759087, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.024648375809192657, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.024067556485533714, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.021787235513329506, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.021026581525802612, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.016630202531814575, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016445346176624298, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015100231394171715, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011703829281032085, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030171139165759087, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.030171139165759087, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.10949655622243881, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.09978652745485306, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09212015569210052, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08359648287296295, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.0509566105902195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04515158012509346, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06670692563056946, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06039930507540703, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.052164942026138306, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.044842589646577835, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04388097673654556, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.034083712846040726, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.028954647481441498, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0246939305216074, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.02359963022172451, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01714523509144783, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.01311969943344593, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012405653484165668, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.011935298331081867, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011247660033404827, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009174017235636711, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009087850339710712, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007667668629437685, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006319434382021427, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.034083712846040726, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.034083712846040726, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.10038182139396667, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.09104595333337784, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.08036738634109497, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.07303890585899353, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04634612426161766, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03896251320838928, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06552433967590332, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.05974701792001724, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04756060242652893, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.04083880037069321, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.040293704718351364, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.033295225352048874, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.028597412630915642, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.022476503625512123, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.020819105207920074, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.016675276681780815, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.011925945989787579, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010879135690629482, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.010857679881155491, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.009814152494072914, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.008768235333263874, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.008625807240605354, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006702836137264967, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.005699779372662306, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03896251320838928, - "qparams": { - "group_size": 32, + }, + "o_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 6, - 3 + 4 ], "bits_prop": [ - 0.2, - 0.8 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03896251320838928, - "qparams": { - "group_size": 32, + { + "accuracy": 0.9834647178649902, + "total_bits": 174398976, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 6, - 3 + 5, + 4 ], "bits_prop": [ - 0.2, - 0.8 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.2196962684392929, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.1956259310245514, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.18191200494766235, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.16431525349617004, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10129500925540924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.08936693519353867, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.13028761744499207, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.11578310281038284, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.10415330529212952, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.0867660790681839, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.08410904556512833, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06672608852386475, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05529877543449402, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.048619288951158524, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.0469202846288681, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.033448994159698486, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.024960007518529892, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02368614450097084, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.02200533077120781, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.020887790247797966, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.017116054892539978, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.015968549996614456, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.014116569422185421, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009885173290967941, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.033448994159698486, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.033448994159698486, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.21724098920822144, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.18762584030628204, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.17635799944400787, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.14451506733894348, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.10138412564992905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.09007278829813004, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.11993949115276337, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10949390381574631, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.10381460189819336, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.0786522626876831, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07141612470149994, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06134748458862305, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.05279923602938652, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0491875521838665, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04830586910247803, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03077572025358677, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02613159455358982, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.025290369987487793, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.021767815575003624, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.02117391675710678, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.016620414331555367, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01732819713652134, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01542644016444683, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.01264149695634842, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03077572025358677, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.03077572025358677, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9838358163833618, + "total_bits": 175225856, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.15829813480377197, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1487858146429062, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.14525215327739716, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.13199284672737122, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07492004334926605, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07138226926326752, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08410826325416565, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07764451950788498, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.0756741613149643, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06708335131406784, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06413595378398895, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04291786998510361, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03722308948636055, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.035991474986076355, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.035695988684892654, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.021463783457875252, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.018641816452145576, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01837216690182686, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01706669107079506, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.016878917813301086, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.011368026956915855, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01148437149822712, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.010960269719362259, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.007819004356861115, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03722308948636055, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03722308948636055, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24560487270355225, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.23131820559501648, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.22641262412071228, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.2057906538248062, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.1162545382976532, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.1110982596874237, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.1295824646949768, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11960586905479431, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.117339126765728, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10424374788999557, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09957553446292877, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06603027880191803, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.057220518589019775, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.055690258741378784, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05533243343234062, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.032991085201501846, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.028525460511446, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.0281608197838068, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02605529874563217, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.025825491175055504, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.017272284254431725, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.017030632123351097, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.016756068915128708, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011016173288226128, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.032991085201501846, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.032991085201501846, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.21730296313762665, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.1929323524236679, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.18213163316249847, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.16228049993515015, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09963767230510712, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08937249332666397, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12060511112213135, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10956240445375443, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10265921801328659, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08469048142433167, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08077903836965561, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06126946210861206, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.052557773888111115, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.04805370420217514, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.0469326451420784, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031055044382810593, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.025311460718512535, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02475617825984955, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.022401725873351097, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02168901450932026, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.017108937725424767, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016654307022690773, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015647923573851585, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.01171050313860178, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031055044382810593, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.031055044382810593, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9855091571807861, + "total_bits": 178728960, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.1114148423075676, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10206374526023865, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09485622495412827, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08603402227163315, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05193265900015831, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04640425369143486, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06704540550708771, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06075574830174446, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05303548648953438, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04582754895091057, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.044664617627859116, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03416885808110237, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.02910158783197403, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02509387768805027, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.02405870519578457, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.017126204445958138, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.013178206980228424, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.01251102052628994, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.011971802450716496, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011334897950291634, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009085857309401035, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008903445675969124, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007664308417588472, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.00601270142942667, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03416885808110237, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03416885808110237, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09988868236541748, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.09045198559761047, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.08135899901390076, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.07375864684581757, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.0460214838385582, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03956333175301552, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06280318647623062, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.05740860477089882, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04721730202436447, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.04041936993598938, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03961801528930664, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03182412311434746, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.02731063961982727, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.02227248251438141, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.020907631143927574, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.01593686453998089, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.011698780581355095, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010838980786502361, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.010603832080960274, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.009746305644512177, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.008340641856193542, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.008193779736757278, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006598442327231169, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.005373301450163126, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03182412311434746, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03182412311434746, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.234558567404747, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.2125948667526245, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.2015838325023651, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.18212133646011353, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10905492305755615, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09903600811958313, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.13355900347232819, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.12043599784374237, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.11153518408536911, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.09472568333148956, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.09128672629594803, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06830521672964096, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05752931535243988, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.05231732130050659, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.05103752017021179, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03412536531686783, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.02679145522415638, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.025773731991648674, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.023844540119171143, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.02299155853688717, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.017491063103079796, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.016669195145368576, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.01526228990405798, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.01034406665712595, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03412536531686783, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03412536531686783, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 64 + }, "bits": [ 5 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.20622478425502777, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.17783744633197784, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.16725265979766846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.13983768224716187, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.0947675034403801, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08399287611246109, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.11216578632593155, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10274764895439148, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09759266674518585, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07462172210216522, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06849024444818497, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05737496539950371, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.049362216144800186, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04575097933411598, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.044871505349874496, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02874145843088627, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.024076558649539948, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.023275233805179596, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.02015773206949234, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.019564993679523468, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.015371706336736679, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01570102572441101, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.014216779731214046, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011103863827884197, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02874145843088627, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02874145843088627, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9861674904823303, + "total_bits": 181067776, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.16379249095916748, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.15391550958156586, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.15026332437992096, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.13653716444969177, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07759186625480652, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07392103224992752, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08724522590637207, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08030951768159866, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07837392389774323, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.0694195032119751, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06652117520570755, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04447275027632713, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03853161633014679, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.037304412573575974, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03701616823673248, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.022349338978528976, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.01940048672258854, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01912991888821125, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.017765596508979797, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.017584828659892082, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012040293775498867, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.012052235193550587, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01164830382913351, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008335079066455364, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03853161633014679, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03853161633014679, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.24965551495552063, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.2350577563047409, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.2299884408712387, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.2089596390724182, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11829712241888046, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11298355460166931, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.13182014226913452, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.12169945985078812, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11940979957580566, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10591688752174377, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.10122379660606384, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06722857803106308, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.058238741010427475, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.056694600731134415, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05633120611310005, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03362647071480751, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.029079172760248184, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02870907261967659, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.026540128514170647, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.026314586400985718, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01772097311913967, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.017412295565009117, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.017213920131325722, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.01134828943759203, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03362647071480751, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03362647071480751, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "5": 32 + }, "bits": [ 5 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2218073010444641, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.19675515592098236, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.185529425740242, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.16567932069301605, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10164327919483185, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09108199179172516, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12238232791423798, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.1120428517460823, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10478640347719193, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08646640181541443, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08253860473632812, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06243458390235901, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05370816960930824, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.048992760479450226, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04781248793005943, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03145384415984154, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.025740137323737144, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.025165606290102005, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.022783122956752777, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.022035978734493256, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.016987495124340057, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016882557421922684, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015420621261000633, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011775771155953407, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03145384415984154, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03145384415984154, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9919095039367676, + "total_bits": 219944960, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11989356577396393, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.11031892150640488, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1025211364030838, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.09309450536966324, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.056145500391721725, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.050201788544654846, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.07193557918071747, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06563229113817215, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05717279762029648, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.049662962555885315, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04828052595257759, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03668467700481415, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.03149037808179855, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.027151722460985184, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.02605314552783966, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.018361501395702362, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.014337178319692612, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.013624282553792, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.013065779581665993, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.012386849150061607, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009720604866743088, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009766318835318089, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.008233063854277134, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006717348005622625, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03668467700481415, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03668467700481415, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.10808537900447845, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.09889411926269531, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.08873887360095978, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.0805787742137909, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.05012969672679901, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.04307720810174942, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06911439448595047, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.06318000704050064, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.05126386135816574, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.0444214828312397, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.04363585636019707, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.035067759454250336, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.03016137145459652, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.024272147566080093, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.02266235090792179, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.017559930682182312, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.012724274769425392, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.011703436262905598, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.011592337861657143, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.01056984718888998, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.009207496419548988, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.008949295617640018, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.007206174544990063, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.005736554507166147, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.035067759454250336, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.035067759454250336, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.24297447502613068, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.22251319885253906, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.21288332343101501, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.19226576387882233, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.11366003006696701, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.1046648621559143, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.1365175098180771, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.12309354543685913, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.11584001034498215, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.0995156541466713, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.09555310010910034, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06970035284757614, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05884004011750221, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.05449175462126732, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.053430743515491486, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03480111435055733, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.027820445597171783, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02695314772427082, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.02487262338399887, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.024180373176932335, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.017854349687695503, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01700173318386078, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.015935122966766357, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.010471353307366371, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03480111435055733, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03480111435055733, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.1866590529680252, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.1682603806257248, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1593337059020996, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.13428473472595215, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.0877067893743515, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.07996589690446854, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.10467762500047684, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.09597685188055038, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.08862774819135666, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07188255339860916, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06600542366504669, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05354732275009155, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04603121429681778, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.0422009602189064, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04126289486885071, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.026752645149827003, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.02189851924777031, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.021143147721886635, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.0187393706291914, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.01809481345117092, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.014063738286495209, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01396218128502369, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.012753210961818695, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.009278591722249985, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.026752645149827003, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.026752645149827003, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9904629588127136, + "total_bits": 223010816, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.16346412897109985, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.15361496806144714, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.14998775720596313, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.13623632490634918, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07747533917427063, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07380399852991104, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08687001466751099, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.080144502222538, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07820606976747513, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06925085932016373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06621238589286804, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0443425178527832, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03844350576400757, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.037233173847198486, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03694150596857071, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02219727262854576, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.019313639029860497, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01903964765369892, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.017652161419391632, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01748013310134411, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.011812499724328518, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01192252617329359, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.011427763849496841, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008167412132024765, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03844350576400757, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03844350576400757, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.25276437401771545, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.23795901238918304, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.23286408185958862, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.21162313222885132, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11979498714208603, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11446519196033478, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.13395161926746368, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.12316104769706726, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.12091419845819473, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10719039291143417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.1025933176279068, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06831041723489761, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.0590071901679039, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.057478245347738266, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.0571126751601696, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03425443917512894, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.029549716040492058, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02917344868183136, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.026972953230142593, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02674340456724167, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.018291587010025978, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.017787059769034386, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.017797932028770447, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011737482622265816, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03425443917512894, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03425443917512894, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.22584229707717896, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.20061302185058594, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.1895502209663391, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.16928337514400482, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10336372256278992, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09289396554231644, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12435254454612732, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11359259486198425, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10661155730485916, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08814983814954758, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08409000188112259, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06301404535770416, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05432085692882538, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.0496915802359581, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04854245483875275, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03179468214511871, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02587498351931572, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02530420757830143, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02286679483950138, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.02211897075176239, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01720796898007393, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.01665363274514675, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.01568584516644478, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011239712126553059, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03179468214511871, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03179468214511871, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9934052228927612, + "total_bits": 252975104, + "q_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.12550987303256989, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.11574572324752808, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.10826405137777328, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.09818890690803528, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.058871492743492126, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.05317367613315582, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.0747869536280632, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06796778738498688, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05986027792096138, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.05205460637807846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.05062102898955345, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03813527524471283, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.03254168480634689, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.028397465124726295, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.02734845131635666, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.019093181937932968, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.014844918623566628, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.014142239466309547, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01349018793553114, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.012833037413656712, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.010025824420154095, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009868663735687733, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.008536827750504017, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006576343439519405, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03813527524471283, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03813527524471283, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.10840514302253723, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.09943927079439163, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.08988400548696518, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.08161292970180511, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.050377484411001205, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.04363291338086128, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06811995804309845, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.06253161281347275, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.05138061195611954, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.04460929334163666, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.043657053261995316, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.034533992409706116, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.029876045882701874, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.02429313398897648, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.022805994376540184, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.017254292964935303, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.012674450874328613, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.011718736961483955, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.011522620916366577, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.010576660744845867, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.008968164213001728, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.008758768439292908, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.007122151553630829, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.005553002003580332, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.034533992409706116, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.034533992409706116, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.2445341944694519, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.22428086400032043, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.2147752195596695, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.19412188231945038, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.11438918113708496, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.10552900284528732, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.1376810520887375, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.12381111830472946, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.11647208034992218, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.10026116669178009, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.09626638889312744, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.0703100636601448, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.059143781661987305, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.0548669807612896, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.05377807095646858, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03503860533237457, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.027984922751784325, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02709900587797165, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.025055531412363052, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.024365276098251343, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.018026219680905342, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.017075782641768456, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.016094371676445007, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.01046830229461193, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03503860533237457, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03503860533237457, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.20123182237148285, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.17732585966587067, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.16871976852416992, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.13681364059448242, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.093834787607193, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08515405654907227, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.1094697043299675, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.09951674193143845, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09575106203556061, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07386009395122528, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.0657212883234024, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05573827028274536, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.047748856246471405, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.045132603496313095, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.0445009209215641, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027975095435976982, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.023368846625089645, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02278246358036995, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.019349519163370132, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.01891559548676014, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.014928346499800682, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.014571098610758781, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.014019438065588474, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.009778370149433613, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027975095435976982, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027975095435976982, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9917826652526855, + "total_bits": 265314304, + "q_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.16863466799259186, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1583430916070938, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.15461823344230652, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1404763162136078, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08000396192073822, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07616619020700455, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.0897020697593689, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08276297897100449, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08077382296323776, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07144550234079361, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06830079853534698, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04582437500357628, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03973127901554108, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.038474541157484055, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.038175418972969055, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02295256219804287, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.020005859434604645, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.019727131351828575, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01829288713634014, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.018109621480107307, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012250443920493126, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.012427224777638912, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.011843627318739891, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.00859972182661295, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.038474541157484055, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.038175418972969055, - "qparams": { - "group_size": 32, + }, + "k_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 6, - 4 + 6 ], "bits_prop": [ - 0.1, - 0.9 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.25556546449661255, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.2404772937297821, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.23531976342201233, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.21372133493423462, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.12126534432172775, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11580372601747513, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.13477525115013123, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.12465653568506241, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.12237931787967682, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10841426253318787, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.10341820120811462, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06867241859436035, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05963330343365669, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05808518826961517, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.0577278658747673, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.034296128898859024, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02971048653125763, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.029332978650927544, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.027064386755228043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02683015540242195, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.017833668738603592, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01765921525657177, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.017309248447418213, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011347132734954357, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.034296128898859024, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "8": 32 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.034296128898859024, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "6": 32 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.25.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.22791875898838043, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.2022734433412552, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.1905960738658905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.17053423821926117, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10425020009279251, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09339937567710876, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12572240829467773, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11538281291723251, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10762312263250351, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08914969116449356, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08505845814943314, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06406519562005997, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05529705062508583, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.05019103363156319, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.048929378390312195, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03214721009135246, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.026285216212272644, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.025646818801760674, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.023321781307458878, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.022499876096844673, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.017222454771399498, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.017171110957860947, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.015548892319202423, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011782058514654636, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03214721009135246, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.995740532875061, + "total_bits": 336861184, + "q_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03214721009135246, - "qparams": { - "group_size": 128, + }, + "k_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11980855464935303, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10988087207078934, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.10201383382081985, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.09248267114162445, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05610224977135658, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.050053853541612625, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.07220055162906647, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06575749069452286, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05718527361750603, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04948814958333969, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.048190854489803314, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03683188930153847, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.03155763819813728, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.027117108926177025, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.025977060198783875, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.018449172377586365, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.014253402128815651, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.01349485944956541, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.012947814539074898, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.01222312357276678, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009643014520406723, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009654153138399124, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.008068804629147053, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006487464532256126, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03683188930153847, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03683188930153847, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.10497165471315384, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.09564569592475891, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.08499220758676529, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.07719817757606506, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04860248416662216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.041290897876024246, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06798765808343887, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.06223266199231148, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04972340911626816, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.04292153939604759, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.04215984046459198, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03445623815059662, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.02959892898797989, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.023526865988969803, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.021880190819501877, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.01722319982945919, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.012382762506604195, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.011351475492119789, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.011261338368058205, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.010234973393380642, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.009070885367691517, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.008801850490272045, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.007044029887765646, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0057261064648628235, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03445623815059662, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03445623815059662, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.23467493057250977, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.2119542807340622, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.200313001871109, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.18057671189308167, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10896693170070648, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09846409410238266, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.1358121633529663, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.12132219225168228, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.11163902282714844, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.09448490291833878, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.09125511348247528, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06978527456521988, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05794070288538933, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.05229264125227928, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.05088786408305168, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03492084890604019, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.026755038648843765, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.025650281459093094, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.02377169020473957, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.022831762209534645, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.017904389649629593, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01670745201408863, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.015325957909226418, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.01024583075195551, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03492084890604019, - "qparams": { - "group_size": 128, + }, + "v_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03492084890604019, - "qparams": { - "group_size": 128, + }, + "o_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } } - }, - { - "key": "model.layers.26.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.19156062602996826, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.17278847098350525, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.16687998175621033, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.14096888899803162, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.08978520333766937, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08315498381853104, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.10141409933567047, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.0933627113699913, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09147455543279648, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07385678589344025, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06777443736791611, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.0518474280834198, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04475641995668411, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04313505440950394, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04273509606719017, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.025900674983859062, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.022217072546482086, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.021760689094662666, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01899006776511669, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.018732620403170586, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.013575758785009384, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.01354798674583435, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.01304287277162075, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.009001161903142929, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.025900674983859062, - "qparams": { - "group_size": 128, + ], + "model.layers.31.mlp": [ + { + "accuracy": 0.8714317083358765, + "total_bits": 394740800, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.025900674983859062, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.17269256711006165, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.16211679577827454, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.15823101997375488, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1436377465724945, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08198241144418716, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07803075015544891, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09184765815734863, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08484168350696564, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08281451463699341, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.0731440931558609, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06984250992536545, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04692090302705765, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.040701691061258316, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03942273184657097, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03911649063229561, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02346481755375862, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.020428786054253578, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.02014152519404888, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01864362321794033, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.018454788252711296, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012406435795128345, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.012589472346007824, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.011984650045633316, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008593244478106499, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03911649063229561, - "qparams": { - "group_size": 32, + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, "bits": [ 6, - 4 + 3, + 2 ], "bits_prop": [ - 0.1, - 0.9 + 0.05, + 0.2, + 0.75 ], "scale_bits": 4 } }, - "best_option": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03911649063229561, - "qparams": { - "group_size": 32, + { + "accuracy": 0.8749791979789734, + "total_bits": 408503360, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 6, - 4 + 3, + 2 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2558918297290802, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.24068158864974976, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.23553906381130219, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.21372395753860474, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.12153631448745728, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11599196493625641, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.13540337979793549, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.12498662620782852, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.12265196442604065, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10854228585958481, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.10354583710432053, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.069099061191082, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.059849776327610016, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05827409029006958, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05790627375245094, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03454652801156044, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02989738993346691, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.029513578861951828, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.027235331013798714, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02700110152363777, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.018184786662459373, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01791323535144329, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.017658162862062454, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011692393571138382, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03454652801156044, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03454652801156044, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, "bits": [ - 5 + 6, + 3, + 2 ], "bits_prop": [ - 1.0 + 0.05, + 0.2, + 0.75 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.26.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.229839488863945, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.20467358827590942, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.19312182068824768, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.17244219779968262, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10533557832241058, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09464078396558762, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.12716856598854065, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11649826169013977, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10867083072662354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.09028598666191101, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.08597540855407715, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06477442383766174, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05591534823179245, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.05078589916229248, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04952489957213402, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03277353569865227, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.026706870645284653, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.026064518839120865, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.023766059428453445, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.022941570729017258, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.017885863780975342, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.017565451562404633, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.01625690795481205, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.01220723707228899, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03277353569865227, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.891764760017395, + "total_bits": 456551424, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03277353569865227, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.3, + 0.7 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.12316027283668518, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.11331630498170853, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.10658661276102066, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.09654255956411362, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05772746726870537, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.05236983299255371, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.07218415290117264, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06572188436985016, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.058841533958911896, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.05095834285020828, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.049319781363010406, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.036847587674856186, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.03153093159198761, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.027860986068844795, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.026950819417834282, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.018461041152477264, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.01459799986332655, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.013973009772598743, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.013260929845273495, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.012676586396992207, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.00968116708099842, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009675616398453712, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.008344398811459541, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006539102178066969, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.036847587674856186, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.036847587674856186, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.10739545524120331, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.09887145459651947, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.09197734296321869, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.08321479707956314, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.050126247107982635, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.04494678974151611, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06406684964895248, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.05868471413850784, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.05103794485330582, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.044328365474939346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.04293329268693924, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03248954564332962, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.028059503063559532, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.024197228252887726, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.02321127988398075, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.016274580731987953, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.012605033814907074, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.011958968825638294, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.011442143470048904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.010826203972101212, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.008482846431434155, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.008402685634791851, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.007150939200073481, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.005514799617230892, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03248954564332962, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03248954564332962, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + "bits_prop": [ + 0.05, + 0.95 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.27.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.23089927434921265, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.21194636821746826, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.20301984250545502, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.18335716426372528, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10835061222314835, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09990262985229492, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.12853126227855682, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.1170789897441864, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.11032019555568695, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.09488298743963242, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.09098811447620392, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06551075726747513, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.0559929758310318, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.05193260684609413, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.050957754254341125, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.032706405967473984, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.026443563401699066, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02562900260090828, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.02365892007946968, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.023002220317721367, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.016775215044617653, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.016059212386608124, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.015202201902866364, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.009748149663209915, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.032706405967473984, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.896644115447998, + "total_bits": 512257024, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.032706405967473984, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, "bits": [ - 5 + 3, + 2 ], "bits_prop": [ - 1.0 + 0.3, + 0.7 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.20216087996959686, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.17864678800106049, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.17023628950119019, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.14469091594219208, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.094822958111763, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08583397418260574, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.10954831540584564, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10040759295225143, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.0967465490102768, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07649995386600494, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07060523331165314, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.05634811148047447, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.04843301698565483, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04590514302253723, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.045294422656297684, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02840808779001236, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.024343056604266167, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.023745926097035408, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.02080855891108513, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.020412631332874298, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.015540043823421001, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.015944376587867737, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.014755121432244778, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011676146648824215, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02840808779001236, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.02840808779001236, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9347714185714722, + "total_bits": 577676384, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.16853731870651245, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1580449640750885, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.15415802597999573, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1397247314453125, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.08011507242918015, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07613763213157654, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.09002909064292908, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.08306775242090225, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.08093078434467316, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.07136667519807816, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06808846443891525, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04604276269674301, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03991306200623512, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03855675086379051, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03824138268828392, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02304447814822197, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.020054297521710396, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.019755467772483826, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.018301256000995636, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.018100887537002563, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012269536033272743, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.012465421110391617, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.011827374808490276, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008620789274573326, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03855675086379051, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03824138268828392, - "qparams": { - "group_size": 32, + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, "bits": [ - 6, - 4 + 4, + 3 ], "bits_prop": [ - 0.1, - 0.9 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2481362372636795, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.2332562804222107, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.22808125615119934, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.20690955221652985, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.11792434006929398, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.11249946802854538, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.1315278708934784, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.1214105561375618, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11906604468822479, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.10522632300853729, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.10038837045431137, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.06712406128644943, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.0581597201526165, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.05657554417848587, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05620064213871956, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03353831544518471, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.029048901051282883, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.028663601726293564, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.02644895575940609, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.02620764635503292, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01759195327758789, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.017449703067541122, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.01704982854425907, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011423347517848015, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03353831544518471, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, "bits": [ - 5 + 8, + 4, + 3 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.03353831544518471, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9410362243652344, + "total_bits": 592151552, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.22970585525035858, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.20370018482208252, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.19144025444984436, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.17095209658145905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.10510201752185822, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.09385985881090164, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.1287529170513153, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.11715501546859741, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.10859264433383942, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08979537338018417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.0857527107000351, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.06547548621892929, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.056119248270988464, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.05066290497779846, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04931170120835304, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03303416073322296, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.026607835665345192, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.025927811861038208, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02361074835062027, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.0227337833493948, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.01795310713350773, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.017517950385808945, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.016131121665239334, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012104535475373268, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03303416073322296, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 5 + 4, + 3 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03303416073322296, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, "bits": [ - 5 + 8, + 4, + 3 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11540376394987106, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10593491047620773, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09736130386590958, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08823870867490768, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05402972176671028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04765070974826813, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.07179656624794006, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06485253572463989, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.055076949298381805, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.047699373215436935, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04674571380019188, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.0366959348320961, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.03108811192214489, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02615540847182274, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.024870701134204865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.018367968499660492, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.013791359029710293, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012943064793944359, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.012549237348139286, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011739829555153847, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009679596871137619, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009511222131550312, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007901212200522423, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006399047560989857, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.0366959348320961, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.0366959348320961, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.28.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09877195954322815, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.0899503231048584, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.0781656801700592, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.0710907056927681, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.0456559956073761, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03772947937250137, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06682255119085312, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.060666147619485855, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04680398851633072, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.04046016186475754, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0401383712887764, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03371771425008774, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.028932195156812668, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.022156234830617905, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.02025638148188591, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.016918158158659935, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.011708405800163746, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010530938394367695, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.010688216425478458, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.00950216967612505, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.008840436115860939, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.008570762351155281, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006512047722935677, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.005511659197509289, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03772947937250137, - "qparams": { - "group_size": 32, + }, + { + "accuracy": 0.9476466178894043, + "total_bits": 636733536, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 6, + 4, 3 ], "bits_prop": [ - 0.2, - 0.8 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03772947937250137, - "qparams": { - "group_size": 32, + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, "bits": [ - 6, + 4, 3 ], "bits_prop": [ - 0.2, - 0.8 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.23414334654808044, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.21117277443408966, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.19805216789245605, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.17860443890094757, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.1088055968284607, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09748829901218414, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.13708871603012085, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.12275024503469467, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.11153218150138855, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.09412477910518646, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.09114251285791397, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.0704227089881897, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.058745015412569046, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.05229298025369644, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.050663724541664124, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03533336520195007, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.026959996670484543, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.025743907317519188, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.02398289553821087, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.022942597046494484, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.018184278160333633, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.017267098650336266, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.015342055819928646, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.010969946160912514, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03533336520195007, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, "bits": [ - 5 + 8, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03533336520195007, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9654759764671326, + "total_bits": 728020576, + "gate_proj": { + "group_size": { + "4": 128 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.21450118720531464, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.18124882876873016, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.16917820274829865, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.14555774629116058, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.09917859733104706, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.08728785812854767, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.1177215576171875, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.10744893550872803, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.10179218649864197, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.07680131494998932, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.07303254306316376, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.06035967543721199, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.05164036154747009, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.047858692705631256, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04694029688835144, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.030268436297774315, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.025228705257177353, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.02435578778386116, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.020911268889904022, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.020264659076929092, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.01632099039852619, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.016506381332874298, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.015109517611563206, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.011706520803272724, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.030268436297774315, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.030268436297774315, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, "bits": [ - 5 + 8, + 4 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.15696445107460022, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1469581425189972, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.14302770793437958, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.12959256768226624, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07454584538936615, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07067865878343582, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08422692120075226, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07771643996238708, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07537780702114105, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.0663076639175415, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06326711177825928, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.0430309921503067, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03733201324939728, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03589669242501259, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03555546700954437, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02154523879289627, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.01866062916815281, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.018360041081905365, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.017008109018206596, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.016796531155705452, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.01142764650285244, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.011631214059889317, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.010953640565276146, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.008020911365747452, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03733201324939728, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03733201324939728, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.28.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.2296334058046341, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.21543698012828827, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.21038483083248138, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.19069941341876984, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.10907885432243347, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.10384789109230042, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.12207917124032974, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.11272436380386353, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.11017654836177826, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.09719918668270111, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.09265950322151184, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.062447208911180496, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.05405006185173988, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.052412401884794235, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.05202042683959007, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.031248249113559723, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.027054082602262497, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.026678701862692833, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.024638473987579346, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.024397069588303566, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016538143157958984, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.01650979369878769, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.015981676056981087, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011087425984442234, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.031248249113559723, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9690861701965332, + "total_bits": 738755584, + "gate_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.031248249113559723, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "4": 32 + }, "bits": [ - 5 + 4 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2060396373271942, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.181496262550354, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.16944929957389832, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.15085789561271667, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09427372366189957, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.08342226594686508, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11621483415365219, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10614340752363205, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.09757041186094284, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.07990799844264984, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07627253234386444, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.05940887704491615, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.051108263432979584, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.045708660036325455, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.04434337094426155, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03011518344283104, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02444174513220787, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02375638671219349, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.021705860272049904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.020832650363445282, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.016601327806711197, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016696369275450706, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.014817183837294579, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012093517929315567, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03011518344283104, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, "bits": [ - 5 + 8, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.95 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.03011518344283104, - "qparams": { - "group_size": 128, + { + "accuracy": 0.966336727142334, + "total_bits": 750822496, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.1119927316904068, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10280696302652359, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09513181447982788, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08615024387836456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.05220608785748482, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04650898650288582, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.06978306174278259, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.061871349811553955, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05337304621934891, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.046220965683460236, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04523112624883652, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03560733050107956, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.029653459787368774, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.025239186361432076, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.02410363033413887, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.017860783264040947, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.01327588316053152, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012546810321509838, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.012095680460333824, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.01138847041875124, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009348869323730469, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009056990034878254, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.00758269103243947, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006098438519984484, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03560733050107956, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.03560733050107956, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09718233346939087, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08970154821872711, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.0811433419585228, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.07358133047819138, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.045257482677698135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0393819659948349, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06230185553431511, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.0563790425658226, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04614977538585663, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.04027809947729111, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.03953098505735397, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03165297210216522, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.026937710121273994, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.021878719329833984, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.02051260508596897, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.01583121344447136, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.011474241502583027, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010602797381579876, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.010492830537259579, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.009622441604733467, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.008292041718959808, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.008020983077585697, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.00649255933240056, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.005165742710232735, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03165297210216522, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03165297210216522, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + "bits_prop": [ + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.25238460302352905, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.23273515701293945, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.2231483906507492, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.201833575963974, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.11878928542137146, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.10996495932340622, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.14180706441402435, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.12859901785850525, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.12084305286407471, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.10461683571338654, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.10056986659765244, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.07262688130140305, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.0615893229842186, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.057045828551054, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.05591784045100212, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03644578531384468, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.029214072972536087, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02833867445588112, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.02630026452243328, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.02556719072163105, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.018755605444312096, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.018007492646574974, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.016791611909866333, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.011318270117044449, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03644578531384468, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, "bits": [ - 5 + 8, + 5, + 4 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03644578531384468, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9711299538612366, + "total_bits": 765297664, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.19119569659233093, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.16096040606498718, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1492578238248825, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.13189536333084106, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.08833414316177368, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.07583016902208328, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.10768158733844757, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.09630830585956573, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.09128821641206741, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.06944878399372101, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.06679055839776993, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.055417850613594055, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.046366170048713684, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.04270263761281967, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.04180155321955681, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027767345309257507, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.022489134222269058, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.021583952009677887, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.018813010305166245, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.018203632906079292, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.014801101759076118, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.014758259989321232, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.013496431522071362, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.010422829538583755, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027767345309257507, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, "bits": [ - 5 + 5, + 4 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.027767345309257507, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, "bits": [ - 5 + 8, + 5, + 4 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.15388746559619904, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.14390897750854492, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1399255096912384, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.126764714717865, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07305508852005005, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06914712488651276, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08274520188570023, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07631208747625351, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07389645278453827, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06492013484239578, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.061976004391908646, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.042332723736763, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03668070584535599, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.035213395953178406, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03485696017742157, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.021199515089392662, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.018401477485895157, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.018093010410666466, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.016776112839579582, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.016554363071918488, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.011325347237288952, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.011608378030359745, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.010830453597009182, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.00814451090991497, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03668070584535599, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03668070584535599, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.29.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.20402398705482483, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.19119080901145935, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.18650154769420624, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1690361648797989, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.09763187170028687, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.09283037483692169, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.10955685377120972, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.10115346312522888, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.09864699095487595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.08702556788921356, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.083157978951931, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.05667047202587128, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.04948258399963379, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.047901056706905365, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.04753950983285904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02857215516269207, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.02644910104572773, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.026119021698832512, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.024488674476742744, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.024267619475722313, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.016143478453159332, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.018387002870440483, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.015657998621463776, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.014816355891525745, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02857215516269207, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.9825085401535034, + "total_bits": 923968608, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.02857215516269207, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 + 0.25, + 0.75 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.2072651982307434, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.1834327131509781, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.1723218411207199, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.15230457484722137, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.09518592059612274, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.0849943608045578, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.11603394895792007, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.10565811395645142, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.09805550426244736, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.08035334944725037, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.07615283876657486, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.05893181264400482, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.05066078528761864, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.0459565594792366, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.044796172529459, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029620138928294182, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.02429916337132454, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.02371029742062092, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.02143070660531521, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.020677294582128525, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.015935305505990982, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.016153233125805855, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.014360045082867146, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.011440644972026348, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029620138928294182, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, "bits": [ + 8, + 6, 5 ], "bits_prop": [ - 1.0 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.029620138928294182, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9840781688690186, + "total_bits": 938443776, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ + 6, 5 ], "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.11001874506473541, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.10061806440353394, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.09134455770254135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.08269139379262924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.0513625405728817, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04474852606654167, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.070174440741539, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06304656714200974, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05245964229106903, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04520241543650627, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.044524576514959335, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.0358763225376606, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.030236851423978806, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.024950003251433372, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.023537924513220787, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.017991088330745697, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.01326589286327362, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.012380770407617092, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.012068607844412327, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.011201461777091026, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009503090754151344, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.009373573586344719, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007564092054963112, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.0064312126487493515, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.0358763225376606, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.0358763225376606, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09766630083322525, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08884325623512268, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.07681974768638611, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.0697205513715744, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.045117009431123734, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03705954924225807, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06705080717802048, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.060409415513277054, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04631398990750313, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.039963219314813614, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.039705440402030945, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03414427861571312, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.02892552874982357, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.02193361520767212, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01994941756129265, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.01711410842835903, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.011646457947790623, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010457095690071583, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.010643307119607925, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.009425709955394268, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.00892393197864294, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.00863046757876873, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006434679962694645, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.0056418776512146, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03705954924225807, - "qparams": { - "group_size": 32, + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, "bits": [ 6, - 3 + 5 ], "bits_prop": [ - 0.2, - 0.8 + 0.25, + 0.75 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03705954924225807, - "qparams": { - "group_size": 32, + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, "bits": [ + 8, 6, - 3 + 5 ], "bits_prop": [ - 0.2, - 0.8 + 0.05, + 0.1, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.30.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.22731733322143555, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.20390953123569489, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.18996301293373108, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.17124433815479279, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.10525264590978622, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.09347588568925858, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.13531844317913055, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.12082498520612717, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.10800950974225998, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.09079725295305252, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.08828308433294296, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.06972986459732056, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.05779029428958893, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.05072291940450668, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.04889696091413498, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03513824939727783, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.026222888380289078, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.024878885596990585, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.023311611264944077, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.022104568779468536, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.018175048753619194, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.01709645427763462, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.014955064281821251, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.010907317511737347, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03513824939727783, - "qparams": { - "group_size": 128, + }, + { + "accuracy": 0.988966703414917, + "total_bits": 1068803168, + "gate_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.03513824939727783, - "qparams": { - "group_size": 128, + }, + "up_proj": { + "group_size": { + "6": 128 + }, "bits": [ - 5 + 6 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.1591838300228119, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.1415223330259323, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.1360490918159485, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.11488793790340424, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.07468128949403763, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.06827343255281448, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.085018090903759, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.07735458761453629, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.075693279504776, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.06029881536960602, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.055544476956129074, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.043701525777578354, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.03730108216404915, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.03607785701751709, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.03579885885119438, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.021907465532422066, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.019055623561143875, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.018700333312153816, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01628078520298004, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.016079731285572052, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.011880101636052132, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.012272227555513382, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.011486358940601349, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.008974846452474594, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.03730108216404915, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.03730108216404915, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1532525271177292, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.14343804121017456, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1396031230688095, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.12663781642913818, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07331185042858124, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06946975737810135, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08277121186256409, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.0763159692287445, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07404714077711105, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06517621874809265, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06238602474331856, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.042789705097675323, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03731026500463486, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.035968877375125885, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.035644423216581345, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.021620795130729675, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.019796248525381088, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01952780969440937, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.018307644873857498, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.018119890242815018, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012205557897686958, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.013751058839261532, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.011782762594521046, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011014026589691639, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03731026500463486, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03731026500463486, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + "bits_prop": [ + 0.05, + 0.95 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.30.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.16076304018497467, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.1506546586751938, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.1469176858663559, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1332099437713623, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07687477767467499, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.07308559864759445, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.08639097213745117, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07966648042201996, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07764586806297302, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06845302134752274, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.06549456715583801, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.04465290904045105, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03882627189159393, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.037588104605674744, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03729712590575218, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.022510332986712456, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.020532239228487015, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.020270563662052155, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.018962036818265915, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01878969743847847, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.012637749314308167, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.014041556976735592, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.012248044833540916, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.011108364909887314, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03882627189159393, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.037588104605674744, - "qparams": { - "group_size": 32, + }, + { + "accuracy": 0.9899485111236572, + "total_bits": 1109664000, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 5, - 4 + 8, + 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.17596136033535004, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.15571346879005432, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.14729252457618713, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.12889216840267181, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.08116742223501205, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.07318858802318573, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.09855509549379349, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.08804711699485779, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.08316241204738617, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.06801062077283859, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.06424757838249207, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.04993603006005287, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.04286201670765877, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.03978181630373001, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.03900958597660065, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.02551143616437912, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.021941883489489555, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.021558323875069618, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.019552506506443024, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.019085204228758812, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.014635489322245121, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.015491152182221413, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.013668665662407875, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.012229178100824356, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.03900958597660065, - "qparams": { - "group_size": 32, + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 6, - 4 + 8, + 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.03900958597660065, - "qparams": { - "group_size": 32, + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 6, - 4 + 8, + 6 ], "bits_prop": [ - 0.1, - 0.9 + 0.15, + 0.85 ], "scale_bits": 4 } - } - }, - { - "key": "model.layers.31.self_attn.q_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.10609333962202072, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.09668552130460739, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.087825246155262, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.07950422167778015, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.04941534250974655, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.04299376904964447, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.0673164501786232, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.06054253876209259, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.05059179291129112, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.04345640167593956, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.04267892986536026, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.034375641494989395, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.02904845029115677, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.02394936978816986, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.022610358893871307, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.01722540333867073, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.012684681452810764, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.011850867420434952, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.01151732262223959, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.010689279064536095, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.009076335467398167, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.008903292007744312, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.007240488193929195, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.006040247157216072, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.034375641494989395, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.034375641494989395, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.self_attn.k_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.09650260955095291, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.08769647032022476, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.0763181522488594, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.0692908838391304, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.04462414234876633, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03696311637759209, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.06603404134511948, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.05906321480870247, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.04583335295319557, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.039368581026792526, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.0391380675137043, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.03351346030831337, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.028208445757627487, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.02168242074549198, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.01985469087958336, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.01678851619362831, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.011524268426001072, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.010413643904030323, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.01050635427236557, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.009370340034365654, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.008805602788925171, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.008476436138153076, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.006469935644418001, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.005586930084973574, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03696311637759209, - "qparams": { - "group_size": 32, + }, + { + "accuracy": 0.9909499883651733, + "total_bits": 1209278720, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 6, - 3 + 8, + 6 ], "bits_prop": [ - 0.2, - 0.8 + 0.1, + 0.9 ], "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.03696311637759209, - "qparams": { - "group_size": 32, + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, "bits": [ - 6, - 3 + 8, + 6 ], "bits_prop": [ - 0.2, - 0.8 + 0.1, + 0.9 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.self_attn.v_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.18896484375, - "total_bits": 9181184.0, - "err": 0.23935307562351227, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37646484375, - "total_bits": 9967616.0, - "err": 0.212103009223938, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62646484375, - "total_bits": 11016192.0, - "err": 0.19582919776439667, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72021484375, - "total_bits": 11409408.0, - "err": 0.17656318843364716, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22021484375, - "total_bits": 13506560.0, - "err": 0.11070319265127182, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.68896484375, - "total_bits": 15472640.0, - "err": 0.0967671275138855, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0316162109375, - "total_bits": 12715520.0, - "err": 0.14305314421653748, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12646484375, - "total_bits": 13113344.0, - "err": 0.12850801646709442, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.18896484375, - "total_bits": 13375488.0, - "err": 0.11422160267829895, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.53271484375, - "total_bits": 14817280.0, - "err": 0.09455414116382599, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.656982421875, - "total_bits": 15338496.0, - "err": 0.09194377064704895, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0316162109375, - "total_bits": 16909824.0, - "err": 0.07361013442277908, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12646484375, - "total_bits": 17307648.0, - "err": 0.06154417619109154, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22021484375, - "total_bits": 17700864.0, - "err": 0.053287893533706665, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.31396484375, - "total_bits": 18094080.0, - "err": 0.05114767700433731, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.037008076906204224, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22021484375, - "total_bits": 21895168.0, - "err": 0.027506671845912933, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.37646484375, - "total_bits": 22550528.0, - "err": 0.02596958354115486, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.53271484375, - "total_bits": 23205888.0, - "err": 0.024220837280154228, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72021484375, - "total_bits": 23992320.0, - "err": 0.022829636931419373, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0316162109375, - "total_bits": 25298432.0, - "err": 0.019031334668397903, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12646484375, - "total_bits": 25696256.0, - "err": 0.017956769093871117, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2191162109375, - "total_bits": 26084864.0, - "err": 0.01563938520848751, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12646484375, - "total_bits": 34084864.0, - "err": 0.011267753317952156, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.037008076906204224, - "qparams": { - "group_size": 128, + }, + "down_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 } }, - "best_option": { - "desc": "1.0:5b 128g s4", - "bpw": 5.0316162109375, - "total_bits": 21104128.0, - "err": 0.037008076906204224, - "qparams": { - "group_size": 128, + { + "accuracy": 0.9934093952178955, + "total_bits": 1414799616, + "gate_proj": { + "group_size": { + "8": 128 + }, "bits": [ - 5 + 8 ], "bits_prop": [ - 1.0 + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.self_attn.o_proj", - "numel": 16777216, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1878662109375, - "total_bits": 36706304.0, - "err": 0.09778688102960587, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 39852032.0, - "err": 0.08688285201787949, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 44046336.0, - "err": 0.08323801308870316, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7191162109375, - "total_bits": 45619200.0, - "err": 0.07091998308897018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2191162109375, - "total_bits": 54007808.0, - "err": 0.044609811156988144, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6878662109375, - "total_bits": 61872128.0, - "err": 0.040664710104465485, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 50857472.0, - "err": 0.05288160219788551, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 52434944.0, - "err": 0.04666808620095253, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1878662109375, - "total_bits": 53483520.0, - "err": 0.045215755701065063, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.036175113171339035, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.65643310546875, - "total_bits": 61344768.0, - "err": 0.03425614535808563, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 67634688.0, - "err": 0.02681676484644413, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 69212160.0, - "err": 0.02407808043062687, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2191162109375, - "total_bits": 70785024.0, - "err": 0.023145180195569992, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3128662109375, - "total_bits": 72357888.0, - "err": 0.02294161729514599, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 84411904.0, - "err": 0.014218165539205074, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2191162109375, - "total_bits": 87562240.0, - "err": 0.014399238862097263, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3753662109375, - "total_bits": 90183680.0, - "err": 0.014205621555447578, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5316162109375, - "total_bits": 92805120.0, - "err": 0.013116263784468174, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.7191162109375, - "total_bits": 95950848.0, - "err": 0.012997329235076904, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 101189120.0, - "err": 0.008869977667927742, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 102766592.0, - "err": 0.011588957160711288, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218841552734375, - "total_bits": 104334848.0, - "err": 0.008619317784905434, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 136321024.0, - "err": 0.010447136126458645, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.036175113171339035, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5316162109375, - "total_bits": 59250688.0, - "err": 0.036175113171339035, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.mlp.gate_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.1478649526834488, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.13875943422317505, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.13537149131298065, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.1225595623254776, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.07026062905788422, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.06680795550346375, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.07905074954032898, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.07279547303915024, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.07093758136034012, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.06252370029687881, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.05970107018947601, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.040365565568208694, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03492264449596405, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.03376002237200737, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.03348693251609802, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.020196236670017242, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.017460471019148827, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.01720334030687809, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.01589779555797577, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.015721075236797333, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.010643996298313141, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.010721899569034576, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.010248142294585705, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.007250812835991383, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03492264449596405, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.03492264449596405, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + "bits_prop": [ + 1 ], "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.mlp.up_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1876046316964284, - "total_bits": 128456703.99999999, - "err": 0.09879011660814285, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751046316964284, - "total_bits": 139466752.0, - "err": 0.09241506457328796, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251046316964284, - "total_bits": 154146816.0, - "err": 0.09008882939815521, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7188546316964284, - "total_bits": 159651840.0, - "err": 0.08154222369194031, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2188546316964284, - "total_bits": 189011968.0, - "err": 0.04686041921377182, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.6876046316964284, - "total_bits": 216537088.0, - "err": 0.044504180550575256, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031276157924107, - "total_bits": 177997312.0, - "err": 0.05282922089099884, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251046316964284, - "total_bits": 183506944.0, - "err": 0.04861709102988243, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1876046316964284, - "total_bits": 187176960.0, - "err": 0.04734968766570091, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5313546316964284, - "total_bits": 207362048.0, - "err": 0.041646379977464676, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6563023158482144, - "total_bits": 214699008.0, - "err": 0.03977698087692261, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.027075989171862602, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125104631696429, - "total_bits": 242227200.0, - "err": 0.023402567952871323, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.218854631696429, - "total_bits": 247732224.0, - "err": 0.022614866495132446, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.312604631696429, - "total_bits": 253237248.0, - "err": 0.02243170514702797, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031276157924107, - "total_bits": 295437824.0, - "err": 0.013600807636976242, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.218854631696429, - "total_bits": 306452480.0, - "err": 0.011904506012797356, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375104631696429, - "total_bits": 315627520.0, - "err": 0.011735230684280396, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.531354631696429, - "total_bits": 324802560.0, - "err": 0.010879219509661198, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.718854631696429, - "total_bits": 335812608.0, - "err": 0.01076147798448801, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031276157924107, - "total_bits": 354158080.0, - "err": 0.00740289781242609, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125104631696429, - "total_bits": 359667712.0, - "err": 0.0076034036464989185, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.218776157924107, - "total_bits": 365168128.0, - "err": 0.007147919852286577, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125104631696429, - "total_bits": 477108224.0, - "err": 0.005490007810294628, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.027075989171862602, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031276157924107, - "total_bits": 236717567.99999997, - "err": 0.027075989171862602, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.mlp.down_proj", - "numel": 58720256, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1789376395089284, - "total_bits": 127947775.99999999, - "err": 0.10863684117794037, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3753662109375, - "total_bits": 139482112.0, - "err": 0.09760189056396484, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6253662109375, - "total_bits": 154162176.0, - "err": 0.09256364405155182, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7235804966517856, - "total_bits": 159929344.0, - "err": 0.0809030830860138, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2235804966517856, - "total_bits": 189289472.0, - "err": 0.05059179663658142, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7146519252232144, - "total_bits": 218125312.0, - "err": 0.04598020389676094, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031341552734375, - "total_bits": 178001152.0, - "err": 0.06207561865448952, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1253662109375, - "total_bits": 183522304.0, - "err": 0.05534223094582558, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1789376395089284, - "total_bits": 186668032.0, - "err": 0.0517030768096447, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5271519252232144, - "total_bits": 207115264.0, - "err": 0.04294868931174278, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6608973911830356, - "total_bits": 214968832.0, - "err": 0.040814194828271866, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.03198036551475525, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1253662109375, - "total_bits": 242242560.0, - "err": 0.02709018997848034, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.223580496651786, - "total_bits": 248009728.0, - "err": 0.024953555315732956, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.321794782366071, - "total_bits": 253776896.0, - "err": 0.024406827986240387, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031341552734375, - "total_bits": 295441664.0, - "err": 0.016476556658744812, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.223580496651786, - "total_bits": 306729984.0, - "err": 0.01399523951113224, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.339651925223214, - "total_bits": 313545728.0, - "err": 0.013747838325798512, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.527151925223214, - "total_bits": 324555776.0, - "err": 0.01264280267059803, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.723580496651786, - "total_bits": 336090112.0, - "err": 0.01233109924942255, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031341552734375, - "total_bits": 354161920.0, - "err": 0.009657971560955048, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1253662109375, - "total_bits": 359683072.0, - "err": 0.010158034972846508, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.227770124162946, - "total_bits": 365696256.0, - "err": 0.008971517905592918, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.1253662109375, - "total_bits": 477123584.0, - "err": 0.008221769705414772, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.03198036551475525, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:4b 128g s4", - "bpw": 4.031341552734375, - "total_bits": 236721408.0, - "err": 0.03198036551475525, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 + "bits_prop": [ + 1 ], "scale_bits": 4 } } - } - ], - "last_module_idx": 66, - "base_perplexity": 6.030173417331406 + ], + "model.norm.norm": null, + "lm_head.linear": null + }, + "last_module_idx": 66 } \ No newline at end of file